@@ -63,22 +63,93 @@ func optimizeContentStreamUsage(ctx *model.Context, sd *types.StreamDict, objNr
6363 return nil , nil
6464}
6565
66+ func removeEmptyContentStreams (ctx * model.Context , pageDict types.Dict , obj types.Object , pageObjNumber int ) error {
67+ var contentArr types.Array
68+
69+ if ir , ok := obj .(types.IndirectRef ); ok {
70+
71+ objNr := ir .ObjectNumber .Value ()
72+ entry , found := ctx .FindTableEntry (objNr , ir .GenerationNumber .Value ())
73+ if ! found {
74+ return errors .Errorf ("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n " , pageObjNumber )
75+ }
76+
77+ contentStreamDict , ok := entry .Object .(types.StreamDict )
78+ if ok {
79+ if err := contentStreamDict .Decode (); err != nil {
80+ return err
81+ }
82+ if len (contentStreamDict .Content ) == 0 {
83+ pageDict .Delete ("Contents" )
84+ }
85+ return nil
86+ }
87+
88+ contentArr , ok = entry .Object .(types.Array )
89+ if ! ok {
90+ return errors .Errorf ("removeEmptyContentStreams: obj#:%d page content entry neither stream dict nor array.\n " , pageObjNumber )
91+ }
92+
93+ } else if contentArr , ok = obj .(types.Array ); ! ok {
94+ return errors .Errorf ("removeEmptyContentStreams: obj#:%d corrupt page content array\n " , pageObjNumber )
95+ }
96+
97+ var newContentArr types.Array
98+
99+ for _ , c := range contentArr {
100+
101+ ir , ok := c .(types.IndirectRef )
102+ if ! ok {
103+ return errors .Errorf ("removeEmptyContentStreams: obj#:%d corrupt page content array entry\n " , pageObjNumber )
104+ }
105+
106+ objNr := ir .ObjectNumber .Value ()
107+ entry , found := ctx .FindTableEntry (objNr , ir .GenerationNumber .Value ())
108+ if ! found {
109+ return errors .Errorf ("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n " , pageObjNumber )
110+ }
111+
112+ contentStreamDict , ok := entry .Object .(types.StreamDict )
113+ if ! ok {
114+ return errors .Errorf ("identifyPageContent: obj#:%d page content entry is no stream dict\n " , pageObjNumber )
115+ }
116+
117+ if err := contentStreamDict .Decode (); err != nil {
118+ return err
119+ }
120+ if len (contentStreamDict .Content ) > 0 {
121+ newContentArr = append (newContentArr , c )
122+ }
123+ }
124+
125+ pageDict ["Contents" ] = newContentArr
126+
127+ return nil
128+ }
129+
66130func optimizePageContent (ctx * model.Context , pageDict types.Dict , pageObjNumber int ) error {
67- if ! ctx .OptimizeDuplicateContentStreams {
131+ o , found := pageDict .Find ("Contents" )
132+ if ! found {
68133 return nil
69134 }
70- if log .OptimizeEnabled () {
71- log .Optimize .Println ("identifyPageContent begin" )
135+
136+ if err := removeEmptyContentStreams (ctx , pageDict , o , pageObjNumber ); err != nil {
137+ return err
72138 }
73139
74- o , found : = pageDict .Find ("Contents" )
140+ o , found = pageDict .Find ("Contents" )
75141 if ! found {
76- if log .OptimizeEnabled () {
77- log .Optimize .Println ("identifyPageContent end: no \" Contents\" " )
78- }
79142 return nil
80143 }
81144
145+ if ! ctx .OptimizeDuplicateContentStreams {
146+ return nil
147+ }
148+
149+ if log .OptimizeEnabled () {
150+ log .Optimize .Println ("identifyPageContent begin" )
151+ }
152+
82153 var contentArr types.Array
83154
84155 if ir , ok := o .(types.IndirectRef ); ok {
@@ -685,7 +756,7 @@ func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNumber, pag
685756 return nil
686757}
687758
688- // Iterate over all pages and optimize resources.
759+ // Iterate over all pages and optimize content & resources.
689760func parsePagesDict (ctx * model.Context , pagesDict types.Dict , pageNumber int ) (int , error ) {
690761 // TODO Integrate resource consolidation based on content stream requirements.
691762
0 commit comments