-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Fix rare scenario with lazy parser, dictionary, and repcodes #2131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
301a62f
1185dfb
80d3585
4b88bd3
6d687a8
4e05159
3c1eba4
f800e72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -662,10 +662,14 @@ ZSTD_compressBlock_lazy_generic( | |
| 0; | ||
| const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); | ||
|
|
||
| DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); | ||
|
|
||
| /* init */ | ||
| ip += (dictAndPrefixLength == 0); | ||
| if (dictMode == ZSTD_noDict) { | ||
| U32 const maxRep = (U32)(ip - prefixLowest); | ||
| U32 const current = (U32)(ip - base); | ||
| U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); | ||
| U32 const maxRep = current - windowLow; | ||
| if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; | ||
| if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; | ||
| } | ||
|
|
@@ -929,11 +933,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| const BYTE* const ilimit = iend - 8; | ||
| const BYTE* const base = ms->window.base; | ||
| const U32 dictLimit = ms->window.dictLimit; | ||
| const U32 lowestIndex = ms->window.lowLimit; | ||
| const BYTE* const prefixStart = base + dictLimit; | ||
| const BYTE* const dictBase = ms->window.dictBase; | ||
| const BYTE* const dictEnd = dictBase + dictLimit; | ||
| const BYTE* const dictStart = dictBase + lowestIndex; | ||
| const BYTE* const dictStart = dictBase + ms->window.lowLimit; | ||
| const U32 windowLog = ms->cParams.windowLog; | ||
|
|
||
| typedef size_t (*searchMax_f)( | ||
| ZSTD_matchState_t* ms, | ||
|
|
@@ -942,6 +946,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
|
|
||
| U32 offset_1 = rep[0], offset_2 = rep[1]; | ||
|
|
||
| DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); | ||
|
|
||
| /* init */ | ||
| ip += (ip == prefixStart); | ||
|
|
||
|
|
@@ -953,10 +959,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| U32 current = (U32)(ip-base); | ||
|
|
||
| /* check repCode */ | ||
| { const U32 repIndex = (U32)(current+1 - offset_1); | ||
| { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor :
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could "invalidate" bad repcodes at the beginning and set them to zero. Then we'd just need to compare against zero. But that is a slightly larger code change. |
||
| const U32 repIndex = (U32)(current+1 - offset_1); | ||
| const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | ||
| const BYTE* const repMatch = repBase + repIndex; | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ | ||
| if (MEM_read32(ip+1) == MEM_read32(repMatch)) { | ||
| /* repcode detected we should take it */ | ||
| const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; | ||
|
|
@@ -983,10 +990,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| current++; | ||
| /* check repCode */ | ||
| if (offset) { | ||
| const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); | ||
| const U32 repIndex = (U32)(current - offset_1); | ||
| const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | ||
| const BYTE* const repMatch = repBase + repIndex; | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ | ||
| if (MEM_read32(ip) == MEM_read32(repMatch)) { | ||
| /* repcode detected */ | ||
| const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; | ||
|
|
@@ -1013,10 +1021,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| current++; | ||
| /* check repCode */ | ||
| if (offset) { | ||
| const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); | ||
| const U32 repIndex = (U32)(current - offset_1); | ||
| const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | ||
| const BYTE* const repMatch = repBase + repIndex; | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ | ||
| if (MEM_read32(ip) == MEM_read32(repMatch)) { | ||
| /* repcode detected */ | ||
| const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; | ||
|
|
@@ -1057,10 +1066,12 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
|
|
||
| /* check immediate repcode */ | ||
| while (ip <= ilimit) { | ||
| const U32 repIndex = (U32)((ip-base) - offset_2); | ||
| const U32 repCurrent = (U32)(ip-base); | ||
| const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); | ||
| const U32 repIndex = repCurrent - offset_2; | ||
| const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; | ||
| const BYTE* const repMatch = repBase + repIndex; | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ | ||
| if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ | ||
| if (MEM_read32(ip) == MEM_read32(repMatch)) { | ||
| /* repcode detected we should take it */ | ||
| const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -569,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches ( | |
| U32 repLen = 0; | ||
| assert(current >= dictLimit); | ||
| if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ | ||
| if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { | ||
| /* We must validate the repcode offset because when we're using a dictionary the | ||
| * valid offset range shrinks when the dictionary goes out of bounds. | ||
| */ | ||
| if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats not necessary. We validate that it is within the prefix on line 571, so we know the memory is valid. This check just makes sure that the offset is allowed. |
||
| repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; | ||
| } | ||
| } else { /* repIndex < dictLimit || repIndex >= current */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Cyan4973 This is the change that causes the compressed size difference.
Before we were invalidating repcodes for all indices that are invalid by the end of the block.
Now we are only invalidating repcodes that are beyond our maximum offset. When we don't have a dictionary this is clearly okay. When we have a dictionary this is still okay, because we know the dictionary is valid for the entire block.
This shows up as a big difference for small window sizes because each block is exactly 1 window size, which means that the repcodes are never valid.
Do you agree?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, it took me a while, but I'm finally reading the code the same way.
In
maxRep = (U32)(ip - prefixLowest), the trick is thatprefixLowestwas determined relative to the end of the block, to ensure than any match found with an index>= prefixLowestis necessarily valid anywhere in the entire block.As a consequence,
prefixLowestcan be as close as the beginning of the block, when window size == block size, effectively invalidating all repcodes from previous block.The new code
maxRep = current - windowLowchecks more thoroughly vs the actual window size (and of course valid range within the prefix). So it effectively increases the range for which repcodes are valid, up to a maximum ofwindowSize, which is correct.Hence, with more opportunities to find repcode matches at the beginning of the block, it can translate into a few more bytes saved here and there.