Skip to content

Commit 97e8583

Browse files
committed
matchfinder.M4: some refinements to scoring
1 parent 17e5901 commit 97e8583

File tree

3 files changed

+43
-39
lines changed

3 files changed

+43
-39
lines changed

brotli_test.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -674,51 +674,55 @@ func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSi
674674
}
675675

676676
func TestEncodeM4(t *testing.T) {
677-
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 57}, 1<<16)
677+
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 66}, 1<<16)
678+
}
679+
680+
func TestEncodeM4Chain256(t *testing.T) {
681+
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 66, ChainLength: 256}, 1<<16)
678682
}
679683

680684
func BenchmarkEncodeM4(b *testing.B) {
681-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, DistanceBitCost: 57}, 1<<16)
685+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, DistanceBitCost: 66}, 1<<16)
682686
}
683687

684688
func TestEncodeM4Chain1(t *testing.T) {
685-
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
689+
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, DistanceBitCost: 66}, 1<<16)
686690
}
687691

688692
func BenchmarkEncodeM4Chain1(b *testing.B) {
689-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
693+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, DistanceBitCost: 66}, 1<<16)
690694
}
691695

692696
func BenchmarkEncodeM4Chain2(b *testing.B) {
693-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, DistanceBitCost: 57}, 1<<16)
697+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, DistanceBitCost: 66}, 1<<16)
694698
}
695699

696700
func BenchmarkEncodeM4Chain4(b *testing.B) {
697-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, DistanceBitCost: 57}, 1<<16)
701+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, DistanceBitCost: 66}, 1<<16)
698702
}
699703

700704
func BenchmarkEncodeM4Chain8(b *testing.B) {
701-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, DistanceBitCost: 57}, 1<<16)
705+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, DistanceBitCost: 66}, 1<<16)
702706
}
703707

704708
func BenchmarkEncodeM4Chain16(b *testing.B) {
705-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, DistanceBitCost: 57}, 1<<16)
709+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, DistanceBitCost: 66}, 1<<16)
706710
}
707711

708712
func BenchmarkEncodeM4Chain32(b *testing.B) {
709-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, DistanceBitCost: 57}, 1<<16)
713+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, DistanceBitCost: 66}, 1<<16)
710714
}
711715

712716
func BenchmarkEncodeM4Chain64(b *testing.B) {
713-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, DistanceBitCost: 57}, 1<<16)
717+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, DistanceBitCost: 66}, 1<<16)
714718
}
715719

716720
func BenchmarkEncodeM4Chain128(b *testing.B) {
717-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16)
721+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 66}, 1<<16)
718722
}
719723

720724
func BenchmarkEncodeM4Chain256(b *testing.B) {
721-
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 57}, 1<<16)
725+
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 66}, 1<<16)
722726
}
723727

724728
func TestEncodeM0(t *testing.T) {

matchfinder/emitter.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,3 @@ func (e *matchEmitter) emit(m absoluteMatch) {
3232
})
3333
e.NextEmit = m.End
3434
}
35-
36-
// trim shortens m if it extends past maxEnd. Then if the length is at least
37-
// minLength, the match is emitted.
38-
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
39-
if m.End > maxEnd {
40-
m.End = maxEnd
41-
}
42-
if m.End-m.Start >= minLength {
43-
e.emit(m)
44-
}
45-
}

matchfinder/m4.go

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func (q *M4) Reset() {
5656
}
5757

5858
func (q *M4) score(m absoluteMatch) int {
59-
return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
59+
return (m.End-m.Start)*256 + (bits.LeadingZeros32(uint32(m.Start-m.Match))-32)*q.DistanceBitCost
6060
}
6161

6262
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
@@ -112,7 +112,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
112112
// We have found some matches, and we're far enough along that we probably
113113
// won't find overlapping matches, so we might as well emit them.
114114
if matches[1] != (absoluteMatch{}) {
115-
e.trim(matches[1], matches[0].Start, q.MinLength)
115+
if matches[1].End > matches[0].Start {
116+
matches[1].End = matches[0].Start
117+
}
118+
if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
119+
e.emit(matches[1])
120+
}
116121
}
117122
e.emit(matches[0])
118123
matches = [3]absoluteMatch{}
@@ -139,12 +144,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
139144
// Look for a match.
140145
var currentMatch absoluteMatch
141146

142-
if i-candidate != matches[0].Start-matches[0].Match {
143-
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
144-
m := extendMatch2(src, i, candidate, e.NextEmit)
145-
if m.End-m.Start > q.MinLength {
146-
currentMatch = m
147-
}
147+
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
148+
m := extendMatch2(src, i, candidate, e.NextEmit)
149+
if m.End-m.Start > q.MinLength && q.score(m) > 0 {
150+
currentMatch = m
148151
}
149152
}
150153

@@ -157,12 +160,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
157160
if candidate <= 0 || i-candidate > q.MaxDistance {
158161
break
159162
}
160-
if i-candidate != matches[0].Start-matches[0].Match {
161-
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
162-
m := extendMatch2(src, i, candidate, e.NextEmit)
163-
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
164-
currentMatch = m
165-
}
163+
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
164+
m := extendMatch2(src, i, candidate, e.NextEmit)
165+
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
166+
currentMatch = m
166167
}
167168
}
168169
}
@@ -217,14 +218,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
217218

218219
default:
219220
// Emit the first match, shortening it if necessary to avoid overlap with the second.
220-
e.trim(matches[2], matches[1].Start, q.MinLength)
221+
if matches[2].End > matches[1].Start {
222+
matches[2].End = matches[1].Start
223+
}
224+
if matches[2].End-matches[2].Start >= q.MinLength && q.score(matches[2]) > 0 {
225+
e.emit(matches[2])
226+
}
221227
matches[2] = absoluteMatch{}
222228
}
223229
}
224230

225231
// We've found all the matches now; emit the remaining ones.
226232
if matches[1] != (absoluteMatch{}) {
227-
e.trim(matches[1], matches[0].Start, q.MinLength)
233+
if matches[1].End > matches[0].Start {
234+
matches[1].End = matches[0].Start
235+
}
236+
if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
237+
e.emit(matches[1])
238+
}
228239
}
229240
if matches[0] != (absoluteMatch{}) {
230241
e.emit(matches[0])

0 commit comments

Comments
 (0)