Skip to content

Commit aff022f

Browse files
committed
Fix #835, Add config flags for optimization
1 parent 3282d8a commit aff022f

File tree

9 files changed

+65
-11
lines changed

9 files changed

+65
-11
lines changed

pkg/api/api.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,7 @@ func ReadValidateAndOptimize(rs io.ReadSeeker, conf *model.Configuration) (ctx *
162162
return nil, err
163163
}
164164

165-
// TODO add optimize flag to config.yml
166-
if ctx.Conf.Optimize {
165+
if conf.Cmd == model.OPTIMIZE || conf.Optimize {
167166
if err = OptimizeContext(ctx); err != nil {
168167
return nil, err
169168
}

pkg/api/optimize.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ func Optimize(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) error {
3535
if conf == nil {
3636
conf = model.NewDefaultConfiguration()
3737
}
38-
//conf.Cmd = model.OPTIMIZE
3938

4039
ctx, err := ReadValidateAndOptimize(rs, conf)
4140
if err != nil {

pkg/pdfcpu/model/configuration.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,12 @@ type Configuration struct {
227227
// Date format.
228228
DateFormat string
229229

230-
// TODO Add to config.yml
231-
// Optimize page resources.
230+
// Optimize.
232231
Optimize bool
233232

233+
// Optimize page resources via content stream analysis.
234+
OptimizeResourceDicts bool
235+
234236
// Optimize duplicate content streams across pages.
235237
OptimizeDuplicateContentStreams bool
236238

@@ -334,6 +336,7 @@ func newDefaultConfiguration() *Configuration {
334336
TimestampFormat: "2006-01-02 15:04",
335337
DateFormat: "2006-01-02",
336338
Optimize: true,
339+
OptimizeResourceDicts: true,
337340
OptimizeDuplicateContentStreams: false,
338341
CreateBookmarks: true,
339342
NeedAppearances: false,
@@ -405,6 +408,7 @@ func (c Configuration) String() string {
405408
"TimestampFormat: %s\n"+
406409
"DateFormat:  %s\n"+
407410
"Optimize %t\n"+
411+
"OptimizeResourceDicts %t\n"+
408412
"OptimizeDuplicateContentStreams %t\n"+
409413
"CreateBookmarks %t\n"+
410414
"NeedAppearances %t\n",
@@ -425,6 +429,7 @@ func (c Configuration) String() string {
425429
c.TimestampFormat,
426430
c.DateFormat,
427431
c.Optimize,
432+
c.OptimizeResourceDicts,
428433
c.OptimizeDuplicateContentStreams,
429434
c.CreateBookmarks,
430435
c.NeedAppearances,

pkg/pdfcpu/model/parseConfig.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ type configuration struct {
4444
Units string `yaml:"units"` // Be flexible if version < v0.3.8
4545
TimestampFormat string `yaml:"timestampFormat"`
4646
DateFormat string `yaml:"dateFormat"`
47+
Optimize bool `yaml:"optimize"`
48+
OptimizeResourceDicts bool `yaml:"optimizeResourceDicts"`
4749
OptimizeDuplicateContentStreams bool `yaml:"optimizeDuplicateContentStreams"`
4850
CreateBookmarks bool `yaml:"createBookmarks"`
4951
NeedAppearances bool `yaml:"needAppearances"`
@@ -93,6 +95,8 @@ func loadedConfig(c configuration, configPath string) *Configuration {
9395

9496
conf.TimestampFormat = c.TimestampFormat
9597
conf.DateFormat = c.DateFormat
98+
conf.Optimize = c.Optimize
99+
conf.OptimizeResourceDicts = c.OptimizeResourceDicts
96100
conf.OptimizeDuplicateContentStreams = c.OptimizeDuplicateContentStreams
97101
conf.CreateBookmarks = c.CreateBookmarks
98102
conf.NeedAppearances = c.NeedAppearances

pkg/pdfcpu/model/parseConfig_js.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,24 @@ func handleDateFormat(v string, c *Configuration) error {
167167
return nil
168168
}
169169

170+
func handleOptimize(k, v string, c *Configuration) error {
171+
v = strings.ToLower(v)
172+
if v != "true" && v != "false" {
173+
return errors.Errorf("config key %s is boolean", k)
174+
}
175+
c.Optimize = v == "true"
176+
return nil
177+
}
178+
179+
func handleOptimizeResourceDicts(k, v string, c *Configuration) error {
180+
v = strings.ToLower(v)
181+
if v != "true" && v != "false" {
182+
return errors.Errorf("config key %s is boolean", k)
183+
}
184+
c.OptimizeResourceDicts = v == "true"
185+
return nil
186+
}
187+
170188
func handleOptimizeDuplicateContentStreams(k, v string, c *Configuration) error {
171189
v = strings.ToLower(v)
172190
if v != "true" && v != "false" {
@@ -246,6 +264,12 @@ func parseKeysPart2(k, v string, c *Configuration) error {
246264
case "dateFormat":
247265
return handleDateFormat(v, c)
248266

267+
case "optimize":
268+
return handleOptimize(k, v, c)
269+
270+
case "optimizeResourceDicts":
271+
return handleOptimizeResourceDicts(k, v, c)
272+
249273
case "optimizeDuplicateContentStreams":
250274
return handleOptimizeDuplicateContentStreams(k, v, c)
251275

pkg/pdfcpu/model/parseContent.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,23 @@ func skipBI(l *string, prn PageResourceNames) error {
143143
if i < 0 {
144144
return errBIExpressionCorrupt
145145
}
146+
token := s[:i]
147+
if token == "CS" || token == "ColorSpace" {
148+
s = s[i:]
149+
i, _ = positionToNextWhitespaceOrChar(s, "/")
150+
if i < 0 {
151+
return errBIExpressionCorrupt
152+
}
153+
s = s[1:]
154+
i, _ = positionToNextWhitespaceOrChar(s, "/")
155+
if i < 0 {
156+
return errBIExpressionCorrupt
157+
}
158+
name := s[:i]
159+
if !types.MemberOf(name, []string{"DeviceGray", "DeviceRGB", "DeviceCMYK", "Indexed", "G", "RGB", "CMYK", "I"}) {
160+
prn["ColorSpace"][name] = true
161+
}
162+
}
146163
s = s[i:]
147164
continue
148165
}

pkg/pdfcpu/model/parseContent_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ func TestParseContent(t *testing.T) {
2626
Span<</ActualText<FEFF0020002E>>>, Span<</ActualText<FEFF002E>>>, Span<</ActualText<FEFF00090009>>> BDC
2727
/a1 BMC/a2 MP /a3 /MC0 BDC/P0 scn/RelativeColorimetric ri/P1 SCN/GS0 gs[(Q[i,j]/2.)16.6(The/]maxi\)-)]TJ/CS1 CS/a4<</A<FEFF>>> BDC /a5 <</A<FEFF>>>
2828
BDC (0.5*\(1/8\)*64 or +/4.\))Tj/T1_0 1 Tf <00150015> Tj /Im5 Do/a5 << /A <FEFF> >> BDC/a6/MC1 DP /a7<<>>DP
29-
BI /IM true/W 1/CS/InlineCS/H 1/BPC 1 ID EI Q /Pattern cs/Span<</ActualText<FEFF0009>>> BDC/SH1 sh`
29+
BI /IM true/W 1/CS/CS2/H 1/BPC 1 ID EI Q /Pattern cs/Span<</ActualText<FEFF0009>>> BDC/SH1 sh`
3030

3131
want := NewPageResourceNames()
3232
want["ColorSpace"]["CS0"] = true
3333
want["ColorSpace"]["CS1"] = true
34+
want["ColorSpace"]["CS2"] = true
3435
want["ExtGState"]["GS0"] = true
3536
want["Font"]["T1_0"] = true
3637
want["Pattern"]["P0"] = true

pkg/pdfcpu/model/resources/config.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ decodeAllStreams: false
1414
# ValidationRelaxed,
1515
validationMode: ValidationRelaxed
1616

17-
# validate cross reference table right before writing
17+
# validate cross reference table right before writing.
1818
postProcessValidate: true
1919

2020
# eol for writing:
@@ -52,10 +52,16 @@ timestampFormat: 2006-01-02 15:04
5252
# date format: yyyy-mm-dd
5353
dateFormat: 2006-01-02
5454

55-
# optimize duplicate content streams across pages
55+
# toggle optimization
56+
optimize: true
57+
58+
# optimize page resources via content stream analysis.
59+
optimizeResourceDicts: true
60+
61+
# optimize duplicate content streams across pages.
5662
optimizeDuplicateContentStreams: false
5763

58-
# merge creates bookmarks
64+
# merge creates bookmarks.
5965
createBookmarks: true
6066

6167
# Viewer is expected to supply appearance streams for form fields.

pkg/pdfcpu/optimize.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,8 +1443,7 @@ func OptimizeXRefTable(ctx *model.Context) error {
14431443
return err
14441444
}
14451445

1446-
if ctx.Cmd == model.OPTIMIZE {
1447-
// Consolidate resource dicts.
1446+
if ctx.Cmd == model.OPTIMIZE && ctx.Conf.OptimizeResourceDicts {
14481447
// Extra step with potential for performance hit when processing large files.
14491448
if err := optimizeResourceDicts(ctx); err != nil {
14501449
return err

0 commit comments

Comments
 (0)