Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,7 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
{
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
assert(params.fParams.contentSizeFlag == 1);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
}
Expand Down Expand Up @@ -4053,6 +4054,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
{ size_t oPos = 0;
size_t iPos = 0;
Expand Down
16 changes: 16 additions & 0 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,9 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
return contiguous;
}

/**
* Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
*/
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
{
U32 const maxDistance = 1U << windowLog;
Expand All @@ -980,6 +983,19 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current
return matchLowest;
}

/**
* Returns the lowest allowed match index in the prefix.
*/
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
{
U32 const maxDistance = 1U << windowLog;
U32 const lowestValid = ms->window.dictLimit;
U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
U32 const isDictionary = (ms->loadedDictEnd != 0);
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
return matchLowest;
}



/* debug functions */
Expand Down
10 changes: 5 additions & 5 deletions lib/compress/zstd_double_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
/* presumes that, if there is a dictionary, it must be using Attach mode */
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
Expand Down Expand Up @@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(

/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(lowestValid + maxDistance >= endIndex);
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
}

/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
Comment on lines +111 to +113
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Cyan4973 This is the change that causes the compressed size difference.

Before we were invalidating repcodes for all indices that are invalid by the end of the block.

Now we are only invalidating repcodes that are beyond our maximum offset. When we don't have a dictionary this is clearly okay. When we have a dictionary this is still okay, because we know the dictionary is valid for the entire block.

This shows up as a big difference for small window sizes because each block is exactly 1 window size, which means that the repcodes are never valid.

Do you agree?

Copy link
Contributor

@Cyan4973 Cyan4973 May 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before we were invalidating repcodes for all indices that are invalid by the end of the block.

OK, it took me a while, but I'm finally reading the code the same way.

In maxRep = (U32)(ip - prefixLowest), the trick is that prefixLowest was determined relative to the end of the block, to ensure than any match found with an index >= prefixLowest is necessarily valid anywhere in the entire block.

As a consequence, prefixLowest can be as close as the beginning of the block, when window size == block size, effectively invalidating all repcodes from previous block.
The new code maxRep = current - windowLow checks more thoroughly vs the actual window size (and of course valid range within the prefix). So it effectively increases the range for which repcodes are valid, up to a maximum of windowSize, which is correct.

Hence, with more opportunities to find repcode matches at the beginning of the block, it can translate into a few more bytes saved here and there.

if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
Expand Down
8 changes: 4 additions & 4 deletions lib/compress/zstd_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic(
const BYTE* ip1;
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
Expand All @@ -74,7 +72,9 @@ ZSTD_compressBlock_fast_generic(
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
{ U32 const maxRep = (U32)(ip0 - prefixStart);
{ U32 const current = (U32)(ip0 - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
Expand Down
29 changes: 20 additions & 9 deletions lib/compress/zstd_lazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,10 +662,14 @@ ZSTD_compressBlock_lazy_generic(
0;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));

DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);

/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
}
Expand Down Expand Up @@ -929,11 +933,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base;
const U32 dictLimit = ms->window.dictLimit;
const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
const U32 windowLog = ms->cParams.windowLog;

typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
Expand All @@ -942,6 +946,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(

U32 offset_1 = rep[0], offset_2 = rep[1];

DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");

/* init */
ip += (ip == prefixStart);

Expand All @@ -953,10 +959,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
U32 current = (U32)(ip-base);

/* check repCode */
{ const U32 repIndex = (U32)(current+1 - offset_1);
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor :
I wonder why it's necessary to invoke ZSTD_getLowestMatchIndex() at each position,
as opposed to a constant calculated once ?
I presume this may cost performance (though it's arguably less important for the _extDict() variant).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could "invalidate" bad repcodes at the beginning and set them to zero. Then we'd just need to compare against zero. But that is a slightly larger code change.

const U32 repIndex = (U32)(current+1 - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Expand All @@ -983,10 +990,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
current++;
/* check repCode */
if (offset) {
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Expand All @@ -1013,10 +1021,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
current++;
/* check repCode */
if (offset) {
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Expand Down Expand Up @@ -1057,10 +1066,12 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(

/* check immediate repcode */
while (ip <= ilimit) {
const U32 repIndex = (U32)((ip-base) - offset_2);
const U32 repCurrent = (U32)(ip-base);
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
const U32 repIndex = repCurrent - offset_2;
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Expand Down
5 changes: 4 additions & 1 deletion lib/compress/zstd_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 repLen = 0;
assert(current >= dictLimit);
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
/* We must validate the repcode offset because when we're using a dictionary the
* valid offset range shrinks when the dictionary goes out of bounds.
*/
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be a && instead of &, as a way to guarantee that access at ip - repOffset is valid ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats not necessary. We validate that it is within the prefix on line 571, so we know the memory is valid. This check just makes sure that the offset is allowed.

repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
}
} else { /* repIndex < dictLimit || repIndex >= current */
Expand Down
4 changes: 4 additions & 0 deletions lib/decompress/zstd_ddict.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
dctx->virtualStart = ddict->dictContent;
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dctx->previousDstEnd = dctx->dictEnd;
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dctx->dictContentBeginForFuzzing = dctx->prefixStart;
dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
#endif
if (ddict->entropyPresent) {
dctx->litEntropy = 1;
dctx->fseEntropy = 1;
Expand Down
7 changes: 7 additions & 0 deletions lib/decompress/zstd_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->oversizedDuration = 0;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
dctx->outBufferMode = ZSTD_obm_buffered;
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dctx->dictContentEndForFuzzing = NULL;
#endif
}

ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
Expand Down Expand Up @@ -1039,6 +1042,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
dctx->prefixStart = dict;
dctx->previousDstEnd = (const char*)dict + dictSize;
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dctx->dictContentBeginForFuzzing = dctx->prefixStart;
dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
#endif
return 0;
}

Expand Down
Loading