From c208de77a630b4eceec265f8371cf236972c0c03 Mon Sep 17 00:00:00 2001 From: Ivan Kruglov Date: Wed, 8 May 2019 14:56:07 +0200 Subject: [PATCH] Perl: upgrade zstd to 1.4.0 --- Perl/shared/zstd/Makefile.PL | 2 +- Perl/shared/zstd/common/bitstream.h | 231 +- Perl/shared/zstd/common/compiler.h | 140 + Perl/shared/zstd/common/cpu.h | 215 + Perl/shared/zstd/common/debug.c | 44 + Perl/shared/zstd/common/debug.h | 134 + Perl/shared/zstd/common/entropy_common.c | 49 +- Perl/shared/zstd/common/error_private.c | 33 +- Perl/shared/zstd/common/error_private.h | 16 +- Perl/shared/zstd/common/fse.h | 178 +- Perl/shared/zstd/common/fse_decompress.c | 34 +- Perl/shared/zstd/common/huf.h | 324 +- Perl/shared/zstd/common/mem.h | 98 +- Perl/shared/zstd/common/pool.c | 340 + Perl/shared/zstd/common/pool.h | 84 + Perl/shared/zstd/common/threading.c | 75 + Perl/shared/zstd/common/threading.h | 123 + Perl/shared/zstd/common/xxhash.c | 59 +- Perl/shared/zstd/common/xxhash.h | 26 +- Perl/shared/zstd/common/zstd_common.c | 68 +- Perl/shared/zstd/common/zstd_errors.h | 97 +- Perl/shared/zstd/common/zstd_internal.h | 293 +- Perl/shared/zstd/compress/fse_compress.c | 361 +- Perl/shared/zstd/compress/hist.c | 203 + Perl/shared/zstd/compress/hist.h | 95 + Perl/shared/zstd/compress/huf_compress.c | 333 +- Perl/shared/zstd/compress/zstd_compress.c | 6132 ++++++++++------- .../zstd/compress/zstd_compress_internal.h | 863 +++ Perl/shared/zstd/compress/zstd_double_fast.c | 499 ++ Perl/shared/zstd/compress/zstd_double_fast.h | 38 + Perl/shared/zstd/compress/zstd_fast.c | 470 ++ Perl/shared/zstd/compress/zstd_fast.h | 37 + Perl/shared/zstd/compress/zstd_lazy.c | 1106 +++ Perl/shared/zstd/compress/zstd_lazy.h | 67 + Perl/shared/zstd/compress/zstd_ldm.c | 597 ++ Perl/shared/zstd/compress/zstd_ldm.h | 105 + Perl/shared/zstd/compress/zstd_opt.c | 1233 ++++ Perl/shared/zstd/compress/zstd_opt.h | 945 +-- Perl/shared/zstd/compress/zstdmt_compress.c | 2099 ++++++ Perl/shared/zstd/compress/zstdmt_compress.h | 191 + Perl/shared/zstd/decompress/huf_decompress.c | 887 ++- Perl/shared/zstd/decompress/zstd_ddict.c | 240 + Perl/shared/zstd/decompress/zstd_ddict.h | 44 + Perl/shared/zstd/decompress/zstd_decompress.c | 2629 +++---- .../zstd/decompress/zstd_decompress_block.c | 1311 ++++ .../zstd/decompress/zstd_decompress_block.h | 59 + .../decompress/zstd_decompress_internal.h | 175 + Perl/shared/zstd/zstd.h | 1926 +++++- 48 files changed, 18859 insertions(+), 6449 deletions(-) create mode 100644 Perl/shared/zstd/common/compiler.h create mode 100644 Perl/shared/zstd/common/cpu.h create mode 100644 Perl/shared/zstd/common/debug.c create mode 100644 Perl/shared/zstd/common/debug.h create mode 100644 Perl/shared/zstd/common/pool.c create mode 100644 Perl/shared/zstd/common/pool.h create mode 100644 Perl/shared/zstd/common/threading.c create mode 100644 Perl/shared/zstd/common/threading.h create mode 100644 Perl/shared/zstd/compress/hist.c create mode 100644 Perl/shared/zstd/compress/hist.h create mode 100644 Perl/shared/zstd/compress/zstd_compress_internal.h create mode 100644 Perl/shared/zstd/compress/zstd_double_fast.c create mode 100644 Perl/shared/zstd/compress/zstd_double_fast.h create mode 100644 Perl/shared/zstd/compress/zstd_fast.c create mode 100644 Perl/shared/zstd/compress/zstd_fast.h create mode 100644 Perl/shared/zstd/compress/zstd_lazy.c create mode 100644 Perl/shared/zstd/compress/zstd_lazy.h create mode 100644 Perl/shared/zstd/compress/zstd_ldm.c create mode 100644 Perl/shared/zstd/compress/zstd_ldm.h create mode 100644 Perl/shared/zstd/compress/zstd_opt.c create mode 100644 Perl/shared/zstd/compress/zstdmt_compress.c create mode 100644 Perl/shared/zstd/compress/zstdmt_compress.h create mode 100644 Perl/shared/zstd/decompress/zstd_ddict.c create mode 100644 Perl/shared/zstd/decompress/zstd_ddict.h create mode 100644 Perl/shared/zstd/decompress/zstd_decompress_block.c create mode 100644 Perl/shared/zstd/decompress/zstd_decompress_block.h create mode 100644 Perl/shared/zstd/decompress/zstd_decompress_internal.h diff --git a/Perl/shared/zstd/Makefile.PL b/Perl/shared/zstd/Makefile.PL index 83731b4f4..105012451 100644 --- a/Perl/shared/zstd/Makefile.PL +++ b/Perl/shared/zstd/Makefile.PL @@ -57,6 +57,6 @@ lib: libzstd.o clean: $(RM) core *.o *.a *.gcda - $(RM) decompress/*.o + $(RM) common/*.o compress/*.o decompress/*.o @echo Cleaning zstd library completed }; diff --git a/Perl/shared/zstd/common/bitstream.h b/Perl/shared/zstd/common/bitstream.h index 3a45244f8..d955bd677 100644 --- a/Perl/shared/zstd/common/bitstream.h +++ b/Perl/shared/zstd/common/bitstream.h @@ -1,8 +1,7 @@ /* ****************************************************************** bitstream Part of FSE library - header file (to include) - Copyright (C) 2013-2016, Yann Collet. + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -39,7 +38,6 @@ extern "C" { #endif - /* * This API consists of small unitary functions, which must be inlined for best performance. * Since link-time-optimization is not available for all compilers, @@ -50,6 +48,7 @@ extern "C" { * Dependencies ******************************************/ #include "mem.h" /* unaligned access routines */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ @@ -60,18 +59,21 @@ extern "C" { # include /* support for bextr (experimental) */ #endif +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + /*-****************************************** * bitStream encoding API (write forward) ********************************************/ /* bitStream can mix input from multiple sources. -* A critical property of these streams is that they encode and decode in **reverse** direction. -* So the first bit sequence you add will be the last to be read, like a LIFO stack. -*/ -typedef struct -{ + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { size_t bitContainer; - int bitPos; + unsigned bitPos; char* startPtr; char* ptr; char* endPtr; @@ -103,12 +105,12 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); /*-******************************************** * bitStream decoding API (read backward) **********************************************/ -typedef struct -{ +typedef struct { size_t bitContainer; unsigned bitsConsumed; const char* ptr; const char* start; + const char* limitPtr; } BIT_DStream_t; typedef enum { BIT_DStream_unfinished = 0, @@ -151,140 +153,179 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /*-************************************************************** * Internal functions ****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (register U32 val) +MEM_STATIC unsigned BIT_highbit32 (U32 val) { + assert(val != 0); + { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse ( &r, val ); - return (unsigned) r; + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return 31 - __builtin_clz (val); # else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; # endif + } } /*===== Local Constants =====*/ -static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) /*-************************************************************** * bitStream encoding ****************************************************************/ /*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(void*) + * `dstCapacity` must be > sizeof(size_t) * @return : 0 if success, - otherwise an error code (can be tested using ERR_isError() ) */ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) { bitC->bitContainer = 0; bitC->bitPos = 0; bitC->startPtr = (char*)startPtr; bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); - if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); return 0; } /*! BIT_addBits() : - can add up to 26 bits into `bitC`. - Does not check for register overflow ! */ -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) { + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; bitC->bitPos += nbBits; } /*! BIT_addBitsFast() : - * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) { + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); bitC->bitContainer |= value << bitC->bitPos; bitC->bitPos += nbBits; } /*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed * unsafe version; does not check buffer overflow */ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) { size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; + assert(bitC->ptr <= bitC->endPtr); bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ + bitC->bitContainer >>= nbBytes*8; } /*! BIT_flushBits() : + * assumption : bitContainer has not overflowed * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) { size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ + bitC->bitContainer >>= nbBytes*8; } /*! BIT_closeCStream() : * @return : size of CStream, in bytes, - or 0 if it could not fit into dstBuffer */ + * or 0 if it could not fit into dstBuffer */ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) { BIT_addBitsFast(bitC, 1, 1); /* endMark */ BIT_flushBits(bitC); - - if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ - + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); } /*-******************************************************** -* bitStream decoding +* bitStream decoding **********************************************************/ /*! BIT_initDStream() : -* Initialize a BIT_DStream_t. -* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcSize` must be the *exact* size of the bitStream, in bytes. -* @return : size of stream (== srcSize) or an errorCode if a problem is detected -*/ + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) { if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->start = (const char*)srcBuffer; bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->bitContainer = MEM_readLEST(bitD->ptr); { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } } else { - bitD->start = (const char*)srcBuffer; bitD->ptr = bitD->start; bitD->bitContainer = *(const BYTE*)(bitD->start); switch(srcSize) { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - default:; + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; } @@ -298,20 +339,15 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { -#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ -# if defined(__x86_64__) - if (sizeof(bitContainer)==8) - return _bextr_u64(bitContainer, start, nbBits); - else -# endif - return _bextr_u32(bitContainer, start, nbBits); -#else - return (bitContainer >> start) & BIT_mask[nbBits]; -#endif + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { + assert(nbBits < BIT_MASK_SIZE); return bitContainer & BIT_mask[nbBits]; } @@ -320,24 +356,28 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * local register is not modified. * On 32-bits, maxNbBits==24. * On 64-bits, maxNbBits==56. - * @return : value extracted - */ - MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) + * @return : value extracted */ +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); #else - U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); #endif } /*! BIT_lookBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ + * unsafe version; only works if nbBits >= 1 */ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) { - U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); } MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) @@ -348,9 +388,8 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) /*! BIT_readBits() : * Read (consume) next n bits from local register and update. * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. - */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) + * @return : extracted value. */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); @@ -358,25 +397,26 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) } /*! BIT_readBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); BIT_skipBits(bitD, nbBits); return value; } /*! BIT_reloadDStream() : -* Refill `bitD` from buffer previously set in BIT_initDStream() . -* This function is safe, it guarantees it will not read beyond src buffer. -* @return : status of `BIT_DStream_t` internal register. - if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */ - return BIT_DStream_overflow; + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { + if (bitD->ptr >= bitD->limitPtr) { bitD->ptr -= bitD->bitsConsumed >> 3; bitD->bitsConsumed &= 7; bitD->bitContainer = MEM_readLEST(bitD->ptr); @@ -386,6 +426,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; return BIT_DStream_completed; } + /* start < ptr < limitPtr */ { U32 nbBytes = bitD->bitsConsumed >> 3; BIT_DStream_status result = BIT_DStream_unfinished; if (bitD->ptr - nbBytes < bitD->start) { @@ -394,14 +435,14 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) } bitD->ptr -= nbBytes; bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ return result; } } /*! BIT_endOfDStream() : -* @return Tells if DStream has exactly reached its end (all bits consumed). -*/ + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) { return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); diff --git a/Perl/shared/zstd/common/compiler.h b/Perl/shared/zstd/common/compiler.h new file mode 100644 index 000000000..0836e3ed2 --- /dev/null +++ b/Perl/shared/zstd/common/compiler.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/Perl/shared/zstd/common/cpu.h b/Perl/shared/zstd/common/cpu.h new file mode 100644 index 000000000..5f0923fc9 --- /dev/null +++ b/Perl/shared/zstd/common/cpu.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2018-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include + +#include "mem.h" + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/Perl/shared/zstd/common/debug.c b/Perl/shared/zstd/common/debug.c new file mode 100644 index 000000000..3ebdd1cb1 --- /dev/null +++ b/Perl/shared/zstd/common/debug.c @@ -0,0 +1,44 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +int g_debuglevel = DEBUGLEVEL; diff --git a/Perl/shared/zstd/common/debug.h b/Perl/shared/zstd/common/debug.h new file mode 100644 index 000000000..b4fc89d49 --- /dev/null +++ b/Perl/shared/zstd/common/debug.h @@ -0,0 +1,134 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* DEBUGFILE can be defined externally, + * typically through compiler command line. + * note : currently useless. + * Value must be stderr or stdout */ +#ifndef DEBUGFILE +# define DEBUGFILE stderr +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# include +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# include +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ diff --git a/Perl/shared/zstd/common/entropy_common.c b/Perl/shared/zstd/common/entropy_common.c index 83fd97154..b12944e1d 100644 --- a/Perl/shared/zstd/common/entropy_common.c +++ b/Perl/shared/zstd/common/entropy_common.c @@ -43,27 +43,21 @@ #include "huf.h" -/*-**************************************** -* FSE Error Management -******************************************/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } -const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } -/* ************************************************************** -* HUF Error Management -****************************************************************/ unsigned HUF_isError(size_t code) { return ERR_isError(code); } - const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -static short FSE_abs(short a) { return (short)(a<0 ? -a : a); } - size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) { @@ -78,7 +72,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t unsigned charnum = 0; int previous0 = 0; - if (hbSize < 4) return ERROR(srcSize_wrong); + if (hbSize < 4) { + /* This function only works when hbSize >= 4 */ + char buffer[4]; + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 4); + + /* init */ + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); @@ -111,27 +119,28 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); while (charnum < n0) normalizedCounter[charnum++] = 0; if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; bitStream = MEM_readLE32(ip) >> bitCount; } else { bitStream >>= 2; } } - { short const max = (short)((2*threshold-1)-remaining); - short count; + { int const max = (2*threshold-1) - remaining; + int count; if ((bitStream & (threshold-1)) < (U32)max) { - count = (short)(bitStream & (threshold-1)); - bitCount += nbBits-1; + count = bitStream & (threshold-1); + bitCount += nbBits-1; } else { - count = (short)(bitStream & (2*threshold-1)); + count = bitStream & (2*threshold-1); if (count >= threshold) count -= max; - bitCount += nbBits; + bitCount += nbBits; } count--; /* extra accuracy */ - remaining -= FSE_abs(count); - normalizedCounter[charnum++] = count; + remaining -= count < 0 ? -count : count; /* -1 means +1 */ + normalizedCounter[charnum++] = (short)count; previous0 = !count; while (remaining < threshold) { nbBits--; diff --git a/Perl/shared/zstd/common/error_private.c b/Perl/shared/zstd/common/error_private.c index a0fa1724a..7c1bb67a2 100644 --- a/Perl/shared/zstd/common/error_private.c +++ b/Perl/shared/zstd/common/error_private.c @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* The purpose of this file is to have a single list of error strings embedded in binary */ @@ -13,6 +14,10 @@ const char* ERR_getErrorString(ERR_enum code) { +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else static const char* const notErrorCode = "Unspecified error code"; switch( code ) { @@ -20,24 +25,30 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(GENERIC): return "Error (generic)"; case PREFIX(prefix_unknown): return "Unknown frame descriptor"; case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(parameter_unknown): return "Unknown parameter type"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size incorrect"; - case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(maxCode): default: return notErrorCode; } +#endif } diff --git a/Perl/shared/zstd/common/error_private.h b/Perl/shared/zstd/common/error_private.h index 1bc2e4954..0d2fa7e34 100644 --- a/Perl/shared/zstd/common/error_private.h +++ b/Perl/shared/zstd/common/error_private.h @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* Note : this module is expected to remain private, do not expose it */ @@ -48,10 +49,9 @@ typedef ZSTD_ErrorCode ERR_enum; /*-**************************************** * Error codes handling ******************************************/ -#ifdef ERROR -# undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ -#endif -#define ERROR(name) ((size_t)-PREFIX(name)) +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } diff --git a/Perl/shared/zstd/common/fse.h b/Perl/shared/zstd/common/fse.h index 8b07d184d..811c670bd 100644 --- a/Perl/shared/zstd/common/fse.h +++ b/Perl/shared/zstd/common/fse.h @@ -31,13 +31,14 @@ You can contact the author at : - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ****************************************************************** */ -#ifndef FSE_H -#define FSE_H #if defined (__cplusplus) extern "C" { #endif +#ifndef FSE_H +#define FSE_H + /*-***************************************** * Dependencies @@ -45,6 +46,33 @@ extern "C" { #include /* size_t, ptrdiff_t */ +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + /*-**************************************** * FSE simple functions ******************************************/ @@ -56,8 +84,8 @@ extern "C" { if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. if FSE_isError(return), compression failed (more details using FSE_getErrorName()) */ -size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); /*! FSE_decompress(): Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', @@ -69,18 +97,18 @@ size_t FSE_compress(void* dst, size_t dstCapacity, Why ? : making this distinction requires a header. Header management is intentionally delegated to the user layer, which can better manage special cases. */ -size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); /*-***************************************** * Tool functions ******************************************/ -size_t FSE_compressBound(size_t size); /* maximum compressed size */ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ /* Error Management */ -unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ /*-***************************************** @@ -94,7 +122,7 @@ const char* FSE_getErrorName(size_t code); /* provides error code string (usef if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. if FSE_isError(return), it's an error code. */ -size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); /*-***************************************** @@ -102,7 +130,7 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize ******************************************/ /*! FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] +1. count symbol occurrence from source[] into table count[] (see hist.h) 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 3. save normalized counters to memory buffer using writeNCount() 4. build encoding table 'CTable' from normalized counters @@ -120,57 +148,50 @@ or to save and provide normalized distribution using external method. /* *** COMPRESSION *** */ -/*! FSE_count(): - Provides the precise count of each byte within a table 'count'. - 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - *maxSymbolValuePtr will be updated if detected smaller than initial value. - @return : the count of the most frequent symbol (which is not identified). - if return == srcSize, there is only one symbol. - Can also return an error code, which can be tested with FSE_isError(). */ -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. @return : recommended tableLog (necessarily <= 'maxTableLog') */ -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. Typically useful for allocation purpose. */ -size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); /*! FSE_writeNCount(): Compactly save 'normalizedCounter' into 'buffer'. @return : size of the compressed table, or an errorCode, which can be tested using FSE_isError(). */ -size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); -void FSE_freeCTable (FSE_CTable* ct); +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); /*! FSE_buildCTable(): Builds `ct`, which must be already allocated, using FSE_createCTable(). @return : 0, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_compress_usingCTable(): Compress `src` using `ct` into `dst` which must be already allocated. @return : size of compressed data (<= `dstCapacity`), or 0 if compressed data could not fit into `dst`, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); /*! Tutorial : @@ -223,25 +244,27 @@ If there is an error, the function will return an ErrorCode (which can be tested @return : size read from 'rBuffer', or an errorCode, which can be tested using FSE_isError(). maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_DTable* FSE_createDTable(unsigned tableLog); -void FSE_freeDTable(FSE_DTable* dt); +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); /*! FSE_buildDTable(): Builds 'dt', which must be already allocated, using FSE_createDTable(). return : 0, or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_decompress_usingDTable(): Decompress compressed source `cSrc` of size `cSrcSize` using `dt` into `dst` which must be already allocated. @return : size of regenerated data (necessarily <= `dstCapacity`), or an errorCode, which can be tested using FSE_isError() */ -size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); /*! Tutorial : @@ -271,8 +294,10 @@ FSE_decompress_usingDTable() result will tell how many bytes were regenerated (< If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) */ +#endif /* FSE_H */ -#ifdef FSE_STATIC_LINKING_ONLY +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY /* *** Dependency *** */ #include "bitstream.h" @@ -290,35 +315,14 @@ If there is an error, the function will return an error code, which can be teste #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= `1024` unsigned - */ -size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace); - -/** FSE_countFast() : - * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr - */ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` must be a table of minimum `1024` unsigned - */ -size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); - -/*! FSE_count_simple - * Same as FSE_countFast(), but does not use any additional memory (not even on stack). - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -*/ -size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) +/* ***************************************** + * FSE advanced API + ***************************************** */ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ @@ -327,7 +331,7 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. */ -#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + (1<<((maxTableLog>2)?(maxTableLog-2):0)) ) +#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); @@ -351,6 +355,11 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; /* ***************************************** * FSE symbol compression API @@ -503,7 +512,7 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) const U32 tableLog = MEM_read16(ptr); statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; - statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); statePtr->stateLog = tableLog; } @@ -522,11 +531,11 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3 } } -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) { - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); BIT_addBits(bitC, statePtr->value, nbBitsOut); statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; } @@ -538,6 +547,39 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt } +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + /* ====== Decompression ====== */ typedef struct { @@ -664,5 +706,3 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) #if defined (__cplusplus) } #endif - -#endif /* FSE_H */ diff --git a/Perl/shared/zstd/common/fse_decompress.c b/Perl/shared/zstd/common/fse_decompress.c index 40f9ed2de..72bbead5b 100644 --- a/Perl/shared/zstd/common/fse_decompress.c +++ b/Perl/shared/zstd/common/fse_decompress.c @@ -33,43 +33,23 @@ ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - /* ************************************************************** * Includes ****************************************************************/ #include /* malloc, free, qsort */ #include /* memcpy, memset */ -#include /* printf (debug) */ #include "bitstream.h" +#include "compiler.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ /* check and forward error code */ #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } @@ -159,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned { U32 u; for (u=0; u= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ if (tableLog > maxLog) return ERROR(tableLog_tooLarge); ip += NCountLength; cSrcSize -= NCountLength; diff --git a/Perl/shared/zstd/common/huf.h b/Perl/shared/zstd/common/huf.h index 9427ae8cb..6b572c448 100644 --- a/Perl/shared/zstd/common/huf.h +++ b/Perl/shared/zstd/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** - Huffman coder, part of New Generation Entropy library - header file - Copyright (C) 2013-2016, Yann Collet. + huff0 huffman codec, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -31,81 +31,114 @@ You can contact the author at : - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ****************************************************************** */ -#ifndef HUF_H_298734234 -#define HUF_H_298734234 #if defined (__cplusplus) extern "C" { #endif +#ifndef HUF_H_298734234 +#define HUF_H_298734234 /* *** Dependencies *** */ #include /* size_t */ -/* *** simple functions *** */ -/** -HUF_compress() : - Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. - 'dst' buffer must be already allocated. - Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. - @return : size of compressed data (<= `dstCapacity`). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single repeated byte symbol (RLE compression). - if HUF_isError(return), compression failed (more details using HUF_getErrorName()) -*/ -size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif -/** -HUF_decompress() : - Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', - into already allocated buffer 'dst', of minimum size 'dstSize'. - `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. - Note : in contrast with FSE, HUF_decompress can regenerate - RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, - because it knows size to regenerate. - @return : size of regenerated data (== originalSize), - or an error code, which can be tested using HUF_isError() -*/ -size_t HUF_decompress(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize); + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); /* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ /* Error Management */ -unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ /* *** Advanced function *** */ /** HUF_compress2() : - * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` . - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ -size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); /** HUF_compress4X_wksp() : -* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ -size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */ - + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); +#endif /* HUF_H_298734234 */ -#ifdef HUF_STATIC_LINKING_ONLY +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY /* *** Dependencies *** */ #include "mem.h" /* U32 */ /* *** Constants *** */ -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ -#define HUF_SYMBOLVALUE_MAX 255 +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) # error "HUF_TABLELOG_MAX is too large !" #endif @@ -116,123 +149,210 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t s ******************************************/ /* HUF buffer bounds */ #define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* static allocation of HUF's Compression Table */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[maxSymbolValue+1]; \ + U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ void* name##hv = &(name##hb); \ HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } -#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } /* **************************************** * Advanced decompression functions ******************************************/ -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif /* **************************************** -* HUF detailed API -******************************************/ -/*! -HUF_compress() does the following: -1. count symbol occurrence from source[] into table count[] using FSE_count() -2. (optional) refine tableLog using HUF_optimalTableLog() -3. build Huffman table from count using HUF_buildCTable() -4. save Huffman table to memory buffer using HUF_writeCTable() -5. encode the data stream using HUF_compress4X_usingCTable() - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and regenerate 'CTable' using external methods. -*/ -/* FSE_count() : find it within "fse.h" */ + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. */ -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); /*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize); /** HUF_readCTable() : -* Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); /* -HUF_decompress() does the following: -1. select the decompression algorithm (X2, X4) based on pre-computed heuristics -2. build Huffman table from save, using HUF_readDTableXn() -3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable -*/ + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ /** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +/* ====================== */ /* single stream variants */ +/* ====================== */ size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */ +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif -#endif /* HUF_STATIC_LINKING_ONLY */ +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif /* HUF_STATIC_LINKING_ONLY */ #if defined (__cplusplus) } #endif - -#endif /* HUF_H_298734234 */ diff --git a/Perl/shared/zstd/common/mem.h b/Perl/shared/zstd/common/mem.h index 32c63dd17..5da248756 100644 --- a/Perl/shared/zstd/common/mem.h +++ b/Perl/shared/zstd/common/mem.h @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #ifndef MEM_H_MODULE @@ -38,8 +39,12 @@ extern "C" { # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ #endif +#ifndef __has_builtin +# define __has_builtin(x) 0 /* compat. with non-clang compilers */ +#endif + /* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } @@ -48,23 +53,33 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size *****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; - typedef intptr_t iPtrDiff; + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; #else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif typedef unsigned short U16; typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif typedef unsigned int U32; typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ typedef unsigned long long U64; typedef signed long long S64; - typedef ptrdiff_t iPtrDiff; #endif @@ -76,19 +91,18 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -109,7 +123,7 @@ Only use if no other choice to achieve best performance on target platform */ MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } -MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } @@ -120,21 +134,27 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; __pragma( pack(pop) ) #else - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; #endif -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } -MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } #else @@ -182,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ return _byteswap_ulong(in); -#elif defined (__GNUC__) +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) return __builtin_bswap32(in); #else return ((in << 24) & 0xff000000 ) | @@ -196,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in) { #if defined(_MSC_VER) /* Visual Studio */ return _byteswap_uint64(in); -#elif defined (__GNUC__) +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) return __builtin_bswap64(in); #else return ((in << 56) & 0xff00000000000000ULL) | @@ -351,20 +373,6 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) } -/* function safe only for comparisons */ -MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; - } -} - #if defined (__cplusplus) } #endif diff --git a/Perl/shared/zstd/common/pool.c b/Perl/shared/zstd/common/pool.c new file mode 100644 index 000000000..7a8294543 --- /dev/null +++ b/Perl/shared/zstd/common/pool.c @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include "debug.h" /* assert */ +#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ +#include "pool.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_free(ctx->queue, ctx->customMem); + ZSTD_free(ctx->threads, ctx->customMem); + ZSTD_free(ctx, ctx->customMem); +} + + + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_free(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_ctx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_ctx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_ctx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_ctx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/Perl/shared/zstd/common/pool.h b/Perl/shared/zstd/common/pool.h new file mode 100644 index 000000000..458d37f13 --- /dev/null +++ b/Perl/shared/zstd/common/pool.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +#include "zstd.h" + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/Perl/shared/zstd/common/threading.c b/Perl/shared/zstd/common/threading.c new file mode 100644 index 000000000..f3d4fa841 --- /dev/null +++ b/Perl/shared/zstd/common/threading.c @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useless_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/Perl/shared/zstd/common/threading.h b/Perl/shared/zstd/common/threading.h new file mode 100644 index 000000000..d806c89d0 --- /dev/null +++ b/Perl/shared/zstd/common/threading.h @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/Perl/shared/zstd/common/xxhash.c b/Perl/shared/zstd/common/xxhash.c index 29e4fa628..30599aaae 100644 --- a/Perl/shared/zstd/common/xxhash.c +++ b/Perl/shared/zstd/common/xxhash.c @@ -66,10 +66,10 @@ /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ /*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * Should endian-independence be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ @@ -98,32 +98,41 @@ /* Modify the local functions below should you wish to use some other memory routines */ /* for malloc(), free() */ #include +#include /* size_t */ static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } /* for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } -#define XXH_STATIC_LINKING_ONLY +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif #include "xxhash.h" /* ************************************* * Compiler Specific Options ***************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# define FORCE_INLINE static __forceinline +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline #else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR + + +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif @@ -246,7 +255,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); @@ -254,7 +263,7 @@ FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_a return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } -FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } @@ -264,7 +273,7 @@ static U32 XXH_readBE32(const void* ptr) return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); @@ -272,7 +281,7 @@ FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_a return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } @@ -333,7 +342,7 @@ static U32 XXH32_round(U32 seed, U32 input) return seed; } -FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; @@ -433,7 +442,7 @@ static U64 XXH64_mergeRound(U64 acc, U64 val) return acc; } -FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -582,7 +591,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long } -FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -652,7 +661,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* -FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem32; const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; @@ -702,7 +711,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) /* **** XXH64 **** */ -FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -769,7 +778,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* -FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem64; const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; diff --git a/Perl/shared/zstd/common/xxhash.h b/Perl/shared/zstd/common/xxhash.h index 2c9b7c61b..9bad1f59f 100644 --- a/Perl/shared/zstd/common/xxhash.h +++ b/Perl/shared/zstd/common/xxhash.h @@ -64,16 +64,12 @@ XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - #if defined (__cplusplus) extern "C" { #endif -#ifndef XXH_NAMESPACE -# define XXH_NAMESPACE ZSTD_ /* Zstandard specific */ -#endif +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 /* **************************** @@ -242,6 +238,11 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH /* ************************** * Canonical representation ****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; typedef struct { unsigned char digest[8]; } XXH64_canonical_t; @@ -251,14 +252,9 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ +#endif /* XXHASH_H_5627135585666179 */ -#ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ This section contains definitions which are not guaranteed to remain stable. @@ -266,6 +262,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src They shall only be used with static linking. Never use these definitions in association with dynamic linking ! =================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 /* These definitions are only meant to allow allocation of XXH state statically, on stack, or in a struct for example. @@ -299,11 +297,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ # endif -#endif /* XXH_STATIC_LINKING_ONLY */ +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ #if defined (__cplusplus) } #endif - -#endif /* XXHASH_H_5627135585666179 */ diff --git a/Perl/shared/zstd/common/zstd_common.c b/Perl/shared/zstd/common/zstd_common.c index f30128c79..667f4a27f 100644 --- a/Perl/shared/zstd/common/zstd_common.c +++ b/Perl/shared/zstd/common/zstd_common.c @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ @@ -12,66 +13,71 @@ /*-************************************* * Dependencies ***************************************/ -#include /* malloc */ +#include /* malloc, calloc, free */ +#include /* memset */ #include "error_private.h" -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ +#include "zstd_internal.h" /*-**************************************** * Version ******************************************/ -unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } /*-**************************************** * ZSTD Error Management ******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ /*! ZSTD_isError() : -* tells if a return value is an error code */ + * tells if a return value is an error code + * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : -* provides error code string from function result (useful for debugging) */ + * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /*! ZSTD_getError() : -* convert a `size_t` function result into a proper ZSTD_errorCode enum */ + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } /*! ZSTD_getErrorString() : -* provides error code string from enum */ -const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); } + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } -/* --- ZBUFF Error Management (deprecated) --- */ -unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } -const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } /*=************************************************************** * Custom allocator ****************************************************************/ -/* default uses stdlib */ -void* ZSTD_defaultAllocFunction(void* opaque, size_t size) -{ - void* address = malloc(size); - (void)opaque; - return address; -} - -void ZSTD_defaultFreeFunction(void* opaque, void* address) +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) { - (void)opaque; - free(address); + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return malloc(size); } -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) { - return customMem.customAlloc(customMem.opaque, size); + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + memset(ptr, 0, size); + return ptr; + } + return calloc(1, size); } void ZSTD_free(void* ptr, ZSTD_customMem customMem) { - if (ptr!=NULL) - customMem.customFree(customMem.opaque, ptr); + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + free(ptr); + } } diff --git a/Perl/shared/zstd/common/zstd_errors.h b/Perl/shared/zstd/common/zstd_errors.h index 50dc4f720..92a343389 100644 --- a/Perl/shared/zstd/common/zstd_errors.h +++ b/Perl/shared/zstd/common/zstd_errors.h @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #ifndef ZSTD_ERRORS_H_398273423 @@ -18,39 +19,71 @@ extern "C" { #include /* size_t */ -/*-**************************************** -* error codes list -******************************************/ +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_maxCode + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; /*! ZSTD_getErrorCode() : convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare directly with enum list published into "error_public.h" */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); -const char* ZSTD_getErrorString(ZSTD_ErrorCode code); + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ #if defined (__cplusplus) diff --git a/Perl/shared/zstd/common/zstd_internal.h b/Perl/shared/zstd/common/zstd_internal.h index 96e057758..31f756ab5 100644 --- a/Perl/shared/zstd/common/zstd_internal.h +++ b/Perl/shared/zstd/common/zstd_internal.h @@ -1,75 +1,111 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #ifndef ZSTD_CCOMMON_H_MODULE #define ZSTD_CCOMMON_H_MODULE -/*-******************************************************* -* Compiler specifics -*********************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4324) /* disable: C4324: padded structure */ -# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# ifdef __GNUC__ -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif - +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ /*-************************************* * Dependencies ***************************************/ +#include "compiler.h" #include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +#include "xxhash.h" /* XXH_reset, update, digest */ + + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError /*-************************************* * shared macros ***************************************/ +#undef MIN +#undef MAX #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) -#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ -#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. In order to do that + * (particularly, printing the conditional that failed), this can't just wrap + * RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); /*-************************************* * Common constants ***************************************/ #define ZSTD_OPT_NUM (1<<12) -#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ #define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) @@ -87,6 +123,8 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; @@ -100,40 +138,53 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define LONGNBSEQ 0x7F00 #define MINMATCH 3 -#define EQUAL_READ32 4 #define Litbits 8 #define MaxLit ((1<= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); + return 31 - __builtin_clz(val); # else /* Software version */ - static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - int r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; - return r; + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; # endif + } } +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/Perl/shared/zstd/compress/fse_compress.c b/Perl/shared/zstd/compress/fse_compress.c index 6627facfe..68b47e109 100644 --- a/Perl/shared/zstd/compress/fse_compress.c +++ b/Perl/shared/zstd/compress/fse_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** FSE : Finite State Entropy encoder - Copyright (C) 2013-2015, Yann Collet. + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -32,42 +32,25 @@ - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - /* ************************************************************** * Includes ****************************************************************/ #include /* malloc, free, qsort */ #include /* memcpy, memset */ -#include /* printf (debug) */ +#include "compiler.h" +#include "mem.h" /* U32, U16, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ #include "bitstream.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ -#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define FSE_isError ERR_isError /* ************************************************************** @@ -100,7 +83,9 @@ * wkspSize should be sized to handle worst case situation, which is `1< wkspSize) return ERROR(tableLog_tooLarge); tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif /* symbol start positions */ { U32 u; cumul[0] = 0; - for (u=1; u<=maxSymbolValue+1; u++) { + for (u=1; u <= maxSymbolValue+1; u++) { if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ cumul[u] = cumul[u-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); @@ -139,14 +129,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi { U32 position = 0; U32 symbol; for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurences; - for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ + while (position > highThreshold) + position = (position + step) & tableMask; /* Low proba area */ } } - if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + assert(position==0); /* Must have initialized all positions */ } /* Build table */ @@ -161,7 +153,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi for (s=0; s<=maxSymbolValue; s++) { switch (normalizedCounter[s]) { - case 0: break; + case 0: + /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */ + symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<1) { /* stops at 1 */ - if (previous0) { - unsigned start = charnum; - while (!normalizedCounter[charnum]) charnum++; - while (charnum >= start+24) { + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { start+=24; bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE) bitStream; out[1] = (BYTE)(bitStream>>8); out+=2; bitStream>>=16; } - while (charnum >= start+3) { + while (symbol >= start+3) { start+=3; bitStream += 3 << bitCount; bitCount += 2; } - bitStream += (charnum-start) << bitCount; + bitStream += (symbol-start) << bitCount; bitCount += 2; if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; bitStream >>= 16; bitCount -= 16; } } - { short count = normalizedCounter[charnum++]; - const short max = (short)((2*threshold-1)-remaining); - remaining -= FSE_abs(count); - if (remaining<1) return ERROR(GENERIC); + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; count++; /* +1 for extra accuracy */ - if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ bitStream += count << bitCount; bitCount += nbBits; bitCount -= (count>=1; + previousIs0 = (count==1); + if (remaining<1) return ERROR(GENERIC); + while (remaining>=1; } } if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; @@ -279,191 +290,37 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitCount -= 16; } } + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out+= (bitCount+7) /8; - if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); - return (out-ostart); } -size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { - if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); -} - - - -/*-************************************************************** -* Counting histogram -****************************************************************/ -/*! FSE_count_simple - This function counts byte values within `src`, and store the histogram into table `count`. - It doesn't use any additional memory. - But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. - For this reason, prefer using a table `count` with 256 elements. - @return : count of most numerous element -*/ -size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - const BYTE* const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - - memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); - if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } - - while (ip max) max = count[s]; } - - return (size_t)max; -} - - -/* FSE_count_parallel_wksp() : - * Same as FSE_count_parallel(), but using an externally provided scratch buffer. - * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ -static size_t FSE_count_parallel_wksp( - unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - unsigned checkMax, unsigned* const workSpace) -{ - const BYTE* ip = (const BYTE*)source; - const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - U32* const Counting1 = workSpace; - U32* const Counting2 = Counting1 + 256; - U32* const Counting3 = Counting2 + 256; - U32* const Counting4 = Counting3 + 256; - - memset(Counting1, 0, 4*256*sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - - /* finish last symbols */ - while (ipmaxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } - - { U32 s; for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } } - - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace) -{ - if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); -} - -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[1024]; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); } -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace) -{ - if (*maxSymbolValuePtr < 255) - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); - *maxSymbolValuePtr = 255; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); -} - -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - unsigned tmpCounters[1024]; - return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); -} - - /*-************************************************************** * FSE Compression Code ****************************************************************/ -/*! FSE_sizeof_CTable() : - FSE_CTable is a variable size structure which contains : - `U16 tableLog;` - `U16 maxSymbolValue;` - `U16 nextStateNumber[1 << tableLog];` // This size is variable - `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable -Allocation is manual (C standard does not support variable-size structures). -*/ -size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); -} FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) { @@ -478,20 +335,22 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); } /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { - U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - return minBits; + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; } unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; return tableLog; @@ -508,6 +367,7 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) { + short const NOT_YET_ASSIGNED = -2; U32 s; U32 distributed = 0; U32 ToDistribute; @@ -533,15 +393,19 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, total -= count[s]; continue; } - norm[s]=-2; + + norm[s]=NOT_YET_ASSIGNED; } ToDistribute = (1 << tableLog) - distributed; + if (ToDistribute == 0) + return 0; + if ((total / ToDistribute) > lowOne) { /* risk of rounding to zero */ lowOne = (U32)((total * 3) / (ToDistribute * 2)); for (s=0; s<=maxSymbolValue; s++) { - if ((norm[s] == -2) && (count[s] <= lowOne)) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { norm[s] = 1; distributed++; total -= count[s]; @@ -556,17 +420,24 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, find max, then give all remaining points to max */ U32 maxV = 0, maxC = 0; for (s=0; s<=maxSymbolValue; s++) - if (count[s] > maxC) maxV=s, maxC=count[s]; + if (count[s] > maxC) { maxV=s; maxC=count[s]; } norm[maxV] += (short)ToDistribute; return 0; } + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + { U64 const vStepLog = 62 - tableLog; U64 const mid = (1ULL << (vStepLog-1)) - 1; U64 const rStep = ((((U64)1<> vStepLog); U32 const sEnd = (U32)(end >> vStepLog); @@ -591,7 +462,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20); @@ -613,7 +484,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, U64 restToBeat = vStep * rtbTable[proba]; proba += (count[s]*step) - ((U64)proba< restToBeat; } - if (proba > largestP) largestP=proba, largest=s; + if (proba > largestP) { largestP=proba; largest=s; } normalizedCounter[s] = proba; stillToDistribute -= proba; } } @@ -630,11 +501,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, U32 s; U32 nTotal = 0; for (s=0; s<=maxSymbolValue; s++) - printf("%3i: %4i \n", s, normalizedCounter[s]); + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); for (s=0; s<=maxSymbolValue; s++) nTotal += abs(normalizedCounter[s]); if (nTotal != (1U< not compressible */ if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ @@ -836,7 +707,7 @@ typedef struct { size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) { fseWkspMax_t scratchBuffer; - FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } diff --git a/Perl/shared/zstd/compress/hist.c b/Perl/shared/zstd/compress/hist.c new file mode 100644 index 000000000..45b7babc1 --- /dev/null +++ b/Perl/shared/zstd/compress/hist.c @@ -0,0 +1,203 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include "mem.h" /* U32, BYTE, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ipmaxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; + if (maxSymbolValue > 255) maxSymbolValue = 255; + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); +} diff --git a/Perl/shared/zstd/compress/hist.h b/Perl/shared/zstd/compress/hist.h new file mode 100644 index 000000000..8b389358d --- /dev/null +++ b/Perl/shared/zstd/compress/hist.h @@ -0,0 +1,95 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/Perl/shared/zstd/compress/huf_compress.c b/Perl/shared/zstd/compress/huf_compress.c index bf464daaf..f074f1e0a 100644 --- a/Perl/shared/zstd/compress/huf_compress.c +++ b/Perl/shared/zstd/compress/huf_compress.c @@ -45,18 +45,22 @@ ****************************************************************/ #include /* memcpy, memset */ #include /* printf (debug) */ +#include "compiler.h" #include "bitstream.h" +#include "hist.h" #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ #include "fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ -#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } @@ -78,28 +82,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. */ #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) { BYTE* const ostart = (BYTE*) dst; BYTE* op = ostart; BYTE* const oend = ostart + dstSize; - U32 maxSymbolValue = HUF_TABLELOG_MAX; + unsigned maxSymbolValue = HUF_TABLELOG_MAX; U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; BYTE scratchBuffer[1< not compressible */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ } tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); @@ -127,10 +131,10 @@ struct HUF_CElt_s { }; /* typedef'd to HUF_CElt within "huf.h" */ /*! HUF_writeCTable() : - `CTable` : huffman tree to save, using huf representation. + `CTable` : Huffman tree to save, using huf representation. @return : size of saved CTable */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) { BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; @@ -165,7 +169,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, } -size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) { BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ @@ -177,7 +181,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si /* check result */ if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); /* Prepare base value per rank */ { U32 n, nextRankStart = 0; @@ -206,12 +210,20 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si min >>= 1; } } /* assign value within rank, symbol order */ - { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } + { U32 n; for (n=0; n find closest one (note : there is necessarily at least one !) */ - while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) nBitsToDecrease ++; totalCost -= 1 << (nBitsToDecrease-1); if (rankLast[nBitsToDecrease-1] == noSymbol) @@ -302,7 +315,7 @@ typedef struct { U32 current; } rankPos; -static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) { rankPos rank[32]; U32 n; @@ -318,7 +331,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) U32 const c = count[n]; U32 const r = BIT_highbit32(c+1) + 1; U32 pos = rank[r].current++; - while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } huffNode[pos].count = c; huffNode[pos].byte = (BYTE)n; } @@ -327,11 +343,11 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. */ #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) -typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1]; -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) { nodeElt* const huffNode0 = (nodeElt*)workSpace; nodeElt* const huffNode = huffNode0+1; @@ -341,9 +357,10 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu U32 nodeRoot; /* safety checks */ - if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); memset(huffNode0, 0, sizeof(huffNodeTable)); /* sort, decreasing order */ @@ -401,22 +418,43 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu } /** HUF_buildCTable() : + * @return : maxNbBits * Note : count is used before tree is written, so they can safely overlap */ -size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) { huffNodeTable nodeTable; return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); } -static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; } size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) #define HUF_FLUSHBITS_1(stream) \ if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) @@ -424,14 +462,16 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } #define HUF_FLUSHBITS_2(stream) \ if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) { const BYTE* ip = (const BYTE*) src; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; size_t n; - const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); BIT_CStream_t bitC; /* init */ @@ -444,12 +484,15 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si { case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); HUF_FLUSHBITS_2(&bitC); + /* fall-through */ case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); HUF_FLUSHBITS_1(&bitC); + /* fall-through */ case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); HUF_FLUSHBITS(&bitC); - case 0 : - default: ; + /* fall-through */ + case 0 : /* fall-through */ + default: break; } for (; n>0; n-=4) { /* note : n&3==0 at this stage */ @@ -466,8 +509,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si return BIT_closeCStream(&bitC); } +#if DYNAMIC_BMI2 -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) { size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ const BYTE* ip = (const BYTE*) src; @@ -480,28 +573,31 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; + assert(cSize <= 65535); MEM_writeLE16(ostart, (U16)cSize); op += cSize; } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; + assert(cSize <= 65535); MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; + assert(cSize <= 65535); MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); if (cSize==0) return 0; op += cSize; } @@ -509,65 +605,122 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si return op-ostart; } +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} -/* `workSpace` must a table of at least 1024 unsigned */ -static size_t HUF_compress_internal ( - void* dst, size_t dstSize, +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - unsigned singleStream, - void* workSpace, size_t wkspSize) + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return op-ostart; +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + huffNodeTable nodeTable; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) { + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - union { - U32 count[HUF_SYMBOLVALUE_MAX+1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; - } table; /* `count` can overlap with `CTable`; saves 1 KB */ - /* checks & inits */ - if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); - if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ - if (!dstSize) return 0; /* cannot fit within dst budget */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + /* Scan input and build symbol stats */ - { CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) ); + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); } /* Build Huffman Tree */ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) ); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + table->nodeTable, sizeof(table->nodeTable)); + CHECK_F(maxBits); huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); } /* Write table description header */ - { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) ); - if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ - op += hSize; - } + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } - /* Compress */ - { size_t const cSize = (singleStream) ? - HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */ - HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable); - if (HUF_isError(cSize)) return cSize; - if (cSize==0) return 0; /* uncompressible */ - op += cSize; + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ } - - /* check compressibility */ - if ((size_t)(op-ostart) >= srcSize-1) - return 0; - - return op-ostart; + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); } @@ -576,34 +729,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize) { - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize); + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); } size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) { - unsigned workSpace[1024]; + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); } +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize) { - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize); + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); } size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) { - unsigned workSpace[1024]; + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); } size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); } diff --git a/Perl/shared/zstd/compress/zstd_compress.c b/Perl/shared/zstd/compress/zstd_compress.c index 3202dde21..2e163c8bf 100644 --- a/Perl/shared/zstd/compress/zstd_compress.c +++ b/Perl/shared/zstd/compress/zstd_compress.c @@ -1,2266 +1,2839 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ - /*-************************************* * Dependencies ***************************************/ +#include /* INT_MAX */ #include /* memset */ +#include "cpu.h" #include "mem.h" -#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ -#include "xxhash.h" /* XXH_reset, update, digest */ +#include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ #include "fse.h" #define HUF_STATIC_LINKING_ONLY #include "huf.h" -#include "zstd_internal.h" /* includes zstd.h */ - - -/*-************************************* -* Constants -***************************************/ -static const U32 g_searchStrength = 8; /* control skip over incompressible data */ -#define HASH_READ_SIZE 8 -typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +#include "zstd_compress_internal.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" /*-************************************* * Helper functions ***************************************/ -#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } -size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } - - -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); } /*-************************************* * Context memory management ***************************************/ -struct ZSTD_CCtx_s -{ - const BYTE* nextSrc; /* next block here to continue on current prefix */ - const BYTE* base; /* All regular indexes relative to this position */ - const BYTE* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 nextToUpdate3; /* index from which to continue dictionary update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; - ZSTD_compressionStage_e stage; - U32 rep[ZSTD_REP_NUM]; - U32 savedRep[ZSTD_REP_NUM]; - U32 dictID; - ZSTD_parameters params; - void* workSpace; - size_t workSpaceSize; - size_t blockSize; - U64 frameContentSize; - XXH64_state_t xxhState; +struct ZSTD_CDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictContentSize; + void* workspace; + size_t workspaceSize; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; - - seqStore_t seqStore; /* sequences storage ptrs */ - U32* hashTable; - U32* hashTable3; - U32* chainTable; - HUF_CElt* hufTable; - U32 flagStaticTables; - FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - unsigned tmpCounters[1024]; -}; + U32 dictID; +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) { - return ZSTD_createCCtx_advanced(defaultCustomMem); + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); } -ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) { - ZSTD_CCtx* cctx; + assert(cctx != NULL); + memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} - cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) return NULL; - memset(cctx, 0, sizeof(ZSTD_CCtx)); - memcpy(&(cctx->customMem), &customMem, sizeof(customMem)); +ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ + cctx->staticSize = workspaceSize; + cctx->workSpace = (void*)(cctx+1); + cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; + assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; + cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; + { + void* const ptr = cctx->blockState.nextCBlock + 1; + cctx->entropyWorkspace = (U32*)ptr; + } + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); return cctx; } -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) { - if (cctx==NULL) return 0; /* support free on NULL */ - ZSTD_free(cctx->workSpace, cctx->customMem); - ZSTD_free(cctx, cctx->customMem); - return 0; /* reserved as a potential error code in the future */ + ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; } -size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) { - if (cctx==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*cctx) + cctx->workSpaceSize; + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; } -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) { - return &(ctx->seqStore); + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif } -static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) { - return cctx->params; + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + ZSTD_freeCCtxContent(cctx); + ZSTD_free(cctx, cctx->customMem); + return 0; } -/** ZSTD_checkParams() : - ensure param values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) { -# define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } - CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; - U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; - CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } - CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported); +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; return 0; +#endif } -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) { - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; + if (cctx==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*cctx) + cctx->workSpaceSize + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); } -/** ZSTD_adjustCParams() : - optimize `cPar` for a given input (`srcSize` and `dictSize`). - mostly downsizing to reduce memory consumption and initialization. - Both `srcSize` and `dictSize` are optional (use 0 if unknown), - but if both are 0, no optimization can be done. - Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ -ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) { - if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */ - - /* resize params, to use less memory when necessary */ - { U32 const minSrcSize = (srcSize==0) ? 500 : 0; - U64 const rSize = srcSize + dictSize + minSrcSize; - if (rSize < ((U64)1< srcLog) cPar.windowLog = srcLog; - } } - if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; - { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog); - } - - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - - return cPar; + return ZSTD_sizeof_CCtx(zcs); /* same object */ } +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } -size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) { - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog); - U32 const divider = (cParams.searchLength==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11*maxNbSeq; - - size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog); - size_t const hSize = ((size_t)1) << cParams.hashLog; - U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - - size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<customMem = customMem; + params->compressionLevel = ZSTD_CLEVEL_DEFAULT; + params->fParams.contentSizeFlag = 1; + return params; +} -static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2) +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) { - return (param1.cParams.hashLog == param2.cParams.hashLog) - & (param1.cParams.chainLog == param2.cParams.chainLog) - & (param1.cParams.strategy == param2.cParams.strategy) - & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3)); + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); } -/*! ZSTD_continueCCtx() : - reuse CCtx without reset (note : requires no dictionary) */ -static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize) -{ - U32 const end = (U32)(cctx->nextSrc - cctx->base); - cctx->params = params; - cctx->frameContentSize = frameContentSize; - cctx->lowLimit = end; - cctx->dictLimit = end; - cctx->nextToUpdate = end+1; - cctx->stage = ZSTDcs_init; - cctx->dictID = 0; - cctx->loadedDictEnd = 0; - { int i; for (i=0; irep[i] = repStartValue[i]; } - cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */ - XXH64_reset(&cctx->xxhState, 0); +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_free(params, params->customMem); return 0; } -typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e; - -/*! ZSTD_resetCCtx_advanced() : - note : 'params' must be validated */ -static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, - ZSTD_parameters params, U64 frameContentSize, - ZSTD_compResetPolicy_e const crp) +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) { - if (crp == ZSTDcrp_continue) - if (ZSTD_equivalentParams(params, zc->params)) - return ZSTD_continueCCtx(zc, params, frameContentSize); - - { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog); - U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11*maxNbSeq; - size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); - size_t const hSize = ((size_t)1) << params.cParams.hashLog; - U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - void* ptr; - - /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { - ZSTD_free(zc->workSpace, zc->customMem); - zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - if (zc->workSpace == NULL) return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; - } } - - if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace); /* reset tables only */ - XXH64_reset(&zc->xxhState, 0); - zc->hashLog3 = hashLog3; - zc->hashTable = (U32*)(zc->workSpace); - zc->chainTable = zc->hashTable + hSize; - zc->hashTable3 = zc->chainTable + chainSize; - ptr = zc->hashTable3 + h3Size; - zc->hufTable = (HUF_CElt*)ptr; - zc->flagStaticTables = 0; - ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */ - - zc->nextToUpdate = 1; - zc->nextSrc = NULL; - zc->base = NULL; - zc->dictBase = NULL; - zc->dictLimit = 0; - zc->lowLimit = 0; - zc->params = params; - zc->blockSize = blockSize; - zc->frameContentSize = frameContentSize; - { int i; for (i=0; irep[i] = repStartValue[i]; } - - if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { - zc->seqStore.litFreq = (U32*)ptr; - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); - ptr = zc->seqStore.offCodeFreq + (MaxOff+1); - zc->seqStore.matchTable = (ZSTD_match_t*)ptr; - ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1; - zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr; - ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; - zc->seqStore.litLengthSum = 0; - } - zc->seqStore.sequencesStart = (seqDef*)ptr; - ptr = zc->seqStore.sequencesStart + maxNbSeq; - zc->seqStore.llCode = (BYTE*) ptr; - zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; - zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} - zc->stage = ZSTDcs_init; - zc->dictID = 0; - zc->loadedDictEnd = 0; +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC); + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} - return 0; - } +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params.cParams; + cctxParams->fParams = params.fParams; + cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return 0; } +/* ZSTD_assignParamsToCCtxParams() : + * params is presumed valid at this stage */ +static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( + ZSTD_CCtx_params cctxParams, ZSTD_parameters params) +{ + ZSTD_CCtx_params ret = cctxParams; + ret.cParams = params.cParams; + ret.fParams = params.fParams; + ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return ret; +} -/*! ZSTD_copyCCtx() : -* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. -* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) { - if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + ZSTD_bounds bounds = { 0, 0, 0 }; - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset); + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; - /* copy tables */ - { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); - size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; - size_t const h3Size = (size_t)1 << srcCCtx->hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); - } + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_overlapLog: + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; - /* copy dictionary offsets */ - dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; - dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; - dstCCtx->nextSrc = srcCCtx->nextSrc; - dstCCtx->base = srcCCtx->base; - dstCCtx->dictBase = srcCCtx->dictBase; - dstCCtx->dictLimit = srcCCtx->dictLimit; - dstCCtx->lowLimit = srcCCtx->lowLimit; - dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; - dstCCtx->dictID = srcCCtx->dictID; - - /* copy entropy tables */ - dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - if (srcCCtx->flagStaticTables) { - memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); - memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); - memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); - memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); + default: + { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; + return boundError; + } } - - return 0; } - -/*! ZSTD_reduceTable() : -* reduce table indexes by `reducerValue` */ -static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) { - U32 u; - for (u=0 ; u < size ; u++) { - if (table[u] < reducerValue) table[u] = 0; - else table[u] -= reducerValue; - } + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; } -/*! ZSTD_reduceIndex() : -* rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) { - { U32 const hSize = 1 << zc->params.cParams.hashLog; - ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } - - { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); - ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } - - { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; - ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; } - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* See doc/zstd_compression_format.md for detailed format description */ - -size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); - MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); - return ZSTD_blockHeaderSize+srcSize; +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound); \ } -static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) { - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); - - switch(flSize) + switch(param) { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - default: /*note : should not be necessary : flSize is within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + default: + return 0; } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; } -static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) { - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong); + } } - switch(flSize) + switch(param) { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - } - - ostart[flSize] = *(const BYTE*)src; - return flSize+1; -} + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + break; -static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + default: RETURN_ERROR(parameter_unsupported); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} -static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) { - size_t const minGain = ZSTD_minGain(srcSize); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + if (value) { /* 0 : does not change current level */ + CCtxParams->compressionLevel = value; + } + if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } - /* small ? don't even attempt compression (speed opt) */ -# define LITERAL_NOENTROPY 63 - { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = value; + return value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; } - if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - if (zc->flagStaticTables && (lhSize==3)) { - hType = set_repeat; - singleStream = 1; - cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); - } else { - cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters)) - : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters)); + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - if (cLitSize==1) - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; +#endif - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - default: /* should not be necessary, lhSize is only {3,4,5} */ - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - } - return lhSize+cLitSize; -} + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif -static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; +#endif -static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; +#endif + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) { - BYTE const LL_deltaCode = 19; - BYTE const ML_deltaCode = 36; - const seqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; - BYTE* const ofCodeTable = seqStorePtr->ofCode; - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - for (u=0; u 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; - ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); - mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; - } - if (seqStorePtr->longLengthID==1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); } +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; +#endif + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} -size_t ZSTD_compressSequences(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - size_t srcSize) -{ - const seqStore_t* seqStorePtr = &(zc->seqStore); - U32 count[MaxSeq+1]; - S16 norm[MaxSeq+1]; - FSE_CTable* CTable_LitLength = zc->litlengthCTable; - FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; - FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef* const sequences = seqStorePtr->sequencesStart; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* seqHead; - BYTE scratchBuffer[1<mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->cdict, stage_wrong); - /* Compress literals */ - { const BYTE* const literals = seqStorePtr->litStart; - size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); - if (ZSTD_isError(cSize)) return cSize; - op += cSize; + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, 0, dl->dictSize); + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + cParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation); + cctx->cdict = dl->cdict; + return 0; +} - /* Sequences Header */ - if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) goto _check_compressibility; +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation); + memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} - /* seqHead : flags for FSE encoding type */ - seqHead = op++; +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* CTable for Literal Lengths */ - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - LLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - LLtype = set_compressed; - } } +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} - /* CTable for Offsets */ - { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - Offtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - Offtype = set_compressed; - } } +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} - /* CTable for MatchLengths */ - { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - MLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; } - FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - MLtype = set_compressed; - } } +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + ZSTD_clearAllDicts(cctx); + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + return 0; +} - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - zc->flagStaticTables = 0; - - /* Encoding Sequences */ - { BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - BIT_flushBits(&blockStream); /* (7)*/ - } } +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); - { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } } +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} - /* check compressibility */ -_check_compressibility: - { size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - if ((size_t)(op-ostart) >= maxCSize) return 0; } +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} - /* confirm repcodes */ - { int i; for (i=0; irep[i] = zc->savedRep[i]; } +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} - return op - ostart; +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + static const U64 minSrcSize = 513; /* (1<<9) + 1 */ + static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) + srcSize = minSrcSize; /* presumed small when there is a dictionary */ + else if (srcSize == 0) + srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; + { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cycleLog > cPar.windowLog) + cPar.chainLog -= (cycleLog - cPar.windowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; } +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); +} -/*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH -*/ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) -{ -#if 0 /* for debug */ - static const BYTE* g_start = NULL; - const U32 pos = (U32)((const BYTE*)literals - g_start); - if (g_start==NULL) g_start = (const BYTE*)literals; - /*if ((pos > 1) && (pos < 50000)) */ - printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); -#endif - /* copy Literals */ - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); - seqStorePtr->lit += litLength; +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; + if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; + if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; + if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; + if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; + if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; + if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + assert(!ZSTD_checkCParams(cParams)); + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); +} - /* literal Length */ - if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } - seqStorePtr->sequences[0].litLength = (U16)litLength; +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} - /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, 0, 0); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; + size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); + size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); - /* match Length */ - if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } - seqStorePtr->sequences[0].matchLength = (U16)matchCode; + size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + + matchStateSize + ldmSpace + ldmSeqSpace; - seqStorePtr->sequences++; + DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); + DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); + return sizeof(ZSTD_CCtx) + neededSpace; + } } +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (register size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCCtxSize_usingCParams(cParams); } +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, 0, 0); + size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; + size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; + size_t const streamingSize = inBuffSize + outBuffSize; + + return CCtxSize + streamingSize; + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} -static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - const BYTE* const pStart = pIn; - const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; } - if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pInappliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ } -/*-************************************* -* Hashes -***************************************/ -static const U32 prime3bytes = 506832829U; -static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } +static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + return (cParams1.hashLog == cParams2.hashLog) + & (cParams1.chainLog == cParams2.chainLog) + & (cParams1.strategy == cParams2.strategy) /* opt parser space */ + & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */ +} -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } +/** The parameters are equivalent if ldm is not enabled in both sets or + * all the parameters are equivalent. */ +static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, + ldmParams_t ldmParams2) +{ + return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || + (ldmParams1.enableLdm == ldmParams2.enableLdm && + ldmParams1.hashLog == ldmParams2.hashLog && + ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && + ldmParams1.minMatchLength == ldmParams2.minMatchLength && + ldmParams1.hashRateLog == ldmParams2.hashRateLog); +} -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } +typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } +/* ZSTD_sufficientBuff() : + * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . + * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ +static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, + size_t maxNbLit1, + ZSTD_buffered_policy_e buffPol2, + ZSTD_compressionParameters cParams2, + U64 pledgedSrcSize) +{ + size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); + size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); + size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4); + size_t const maxNbLit2 = blockSize2; + size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; + DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", + (U32)neededBufferSize2, (U32)bufferSize1); + DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", + (U32)maxNbSeq2, (U32)maxNbSeq1); + DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", + (U32)maxNbLit2, (U32)maxNbLit1); + return (maxNbLit2 <= maxNbLit1) + & (maxNbSeq2 <= maxNbSeq1) + & (neededBufferSize2 <= bufferSize1); +} -static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +/** Equivalence for resetCCtx purposes */ +static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, + ZSTD_CCtx_params params2, + size_t buffSize1, + size_t maxNbSeq1, size_t maxNbLit1, + ZSTD_buffered_policy_e buffPol2, + U64 pledgedSrcSize) { - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); + DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); + if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { + DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); + return 0; + } + if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { + DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); + return 0; + } + if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, + params2.cParams, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); + return 0; } + return 1; } +static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} -/*-************************************* -* Fast Scan -***************************************/ -static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) { - U32* const hashTable = zc->hashTable; - U32 const hBits = zc->params.cParams.hashLog; - const BYTE* const base = zc->base; - const BYTE* ip = base + zc->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; + ZSTD_window_clear(&ms->window); - while(ip <= iend) { - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += fastHashFillStep; - } + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; } +/*! ZSTD_continueCCtx() : + * reuse CCtx without reset (note : requires no dictionary) */ +static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); + + cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ + cctx->appliedParams = params; + cctx->blockState.matchState.cParams = params.cParams; + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + cctx->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); + cctx->stage = ZSTDcs_init; + cctx->dictID = 0; + if (params.ldmParams.enableLdm) + ZSTD_window_clear(&cctx->ldmState.window); + ZSTD_referenceExternalSequences(cctx, NULL, 0); + ZSTD_invalidateMatchState(&cctx->blockState.matchState); + ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); + XXH64_reset(&cctx->xxhState, 0); + return 0; +} -FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashTable = cctx->hashTable; - U32 const hBits = cctx->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(cctx->seqStore); - const BYTE* const base = cctx->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE* const lowest = base + lowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hBits, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - hashTable[h] = current; /* update hash table */ - - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - U32 offset; - if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; +typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } +static void* +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + void* ptr, + const ZSTD_compressionParameters* cParams, + ZSTD_compResetPolicy_e const crp, U32 const forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; - cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) + assert(((size_t)ptr & 3) == 0); + + ms->hashLog3 = hashLog3; + memset(&ms->window, 0, sizeof(ms->window)); + ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ + ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_invalidateMatchState(ms); + + /* opt parser space */ + if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ptr; + ms->opt.litLengthFreq = ms->opt.litFreq + (1<opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); + ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); + ptr = ms->opt.offCodeFreq + (MaxOff+1); + ms->opt.matchTable = (ZSTD_match_t*)ptr; + ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; + ms->opt.priceTable = (ZSTD_optimal_t*)ptr; + ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; + } + + /* table Space */ + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ + ms->hashTable = (U32*)(ptr); + ms->chainTable = ms->hashTable + hSize; + ms->hashTable3 = ms->chainTable + chainSize; + ptr = ms->hashTable3 + h3Size; + + ms->cParams = *cParams; + + assert(((size_t)ptr & 3) == 0); + return ptr; +} + +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) { - const U32 mls = ctx->params.cParams.searchLength; - switch(mls) - { - default: - case 4 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; - } -} - - -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* hashTable = ctx->hashTable; - const U32 hBits = ctx->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* repMatch = repBase + repIndex; - size_t mLength; - hashTable[h] = current; /* update hash table */ - - if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ( (matchIndex < lowestIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + if (crp == ZSTDcrp_continue) { + if (ZSTD_equivalentParams(zc->appliedParams, params, + zc->inBuffSize, + zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, + zbuff, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", + zc->appliedParams.cParams.windowLog, zc->blockSize); + zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ + if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) + return ZSTD_continueCCtx(zc, params, pledgedSrcSize); + } } + DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; + size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; + size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + void* ptr; /* used to partition workSpace */ + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const bufferSpace = buffInSize + buffOutSize; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); + size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); + + size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + + ldmSeqSpace + matchStateSize + tokenSpace + + bufferSpace; + + int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; + int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; + int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); + zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; + + DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", + neededSpace>>10, matchStateSize>>10, bufferSpace>>10); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workSpaceTooSmall || workSpaceWasteful) { + DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", + zc->workSpaceSize >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + zc->workSpaceSize = 0; + ZSTD_free(zc->workSpace, zc->customMem); + zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); + RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); + zc->workSpaceSize = neededSpace; + zc->workSpaceOversizedDuration = 0; + + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ + assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; + zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; + ptr = zc->blockState.nextCBlock + 1; + zc->entropyWorkspace = (U32*)ptr; } } - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - /* save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; + /* ldm hash table */ + /* initialize bucketOffsets table later for pointer alignment */ + if (params.ldmParams.enableLdm) { + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->ldmState.hashTable = (ldmEntry_t*)ptr; + ptr = zc->ldmState.hashTable + ldmHSize; + zc->ldmSequences = (rawSeq*)ptr; + ptr = zc->ldmSequences + maxNbLdmSeq; + zc->maxNbLdmSequences = maxNbLdmSeq; + + memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + } + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + + ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); + + /* sequences storage */ + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.sequencesStart = (seqDef*)ptr; + ptr = zc->seqStore.sequencesStart + maxNbSeq; + zc->seqStore.llCode = (BYTE*) ptr; + zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; + zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.maxNbLit = blockSize; + ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + memset(ptr, 0, ldmBucketSize); + zc->ldmState.bucketOffsets = (BYTE*)ptr; + ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + ZSTD_window_clear(&zc->ldmState.window); + } + ZSTD_referenceExternalSequences(zc, NULL, 0); + + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ptr; + zc->outBuffSize = buffOutSize; + zc->outBuff = zc->inBuff + buffInSize; + + return 0; } } +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize) { - U32 const mls = ctx->params.cParams.searchLength; - switch(mls) - { - default: - case 4 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; - } + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + return ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params.attachDictPref == ZSTD_dictForceAttach ) + && params.attachDictPref != ZSTD_dictForceCopy + && !params.forceWindow; /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ } +static size_t ZSTD_resetCCtx_byAttachingCDict( + ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); + params.cParams.windowLog = windowLog; + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_continue, zbuff); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + } -/*-************************************* -* Double Fast -***************************************/ -static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls) -{ - U32* const hashLarge = cctx->hashTable; - U32 const hBitsL = cctx->params.cParams.hashLog; - U32* const hashSmall = cctx->chainTable; - U32 const hBitsS = cctx->params.cParams.chainLog; - const BYTE* const base = cctx->base; - const BYTE* ip = base + cctx->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while(ip <= iend) { - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - - -FORCE_INLINE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashLong = cctx->hashTable; - const U32 hBitsL = cctx->params.cParams.hashLog; - U32* const hashSmall = cctx->chainTable; - const U32 hBitsS = cctx->params.cParams.chainLog; - seqStore_t* seqStorePtr = &(cctx->seqStore); - const BYTE* const base = cctx->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE* const lowest = base + lowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 const matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */ - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + { + const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); } else { - U32 offset; - if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* match3 = base + matchIndex3; - hashLong[h3] = current + 1; - if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - mLength = ZSTD_count(ip+9, match3+8, iend) + 8; - ip++; - offset = (U32)(ip-match3); - while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - } else { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); } + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } + } - offset_2 = offset_1; - offset_1 = offset; + cctx->dictID = cdict->dictID; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - /* match found */ - ip += mLength; - anchor = ip; + return 0; +} - if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + DEBUGLOG(4, "copying dictionary into context"); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_noMemset, zbuff); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + size_t const tableSpace = (chainSize + hSize) * sizeof(U32); + assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); + assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); + memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } - /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; - cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; + /* Zero the hashTable3, since the cdict never fills it */ + { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } + + cctx->dictID = cdict->dictID; + + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; } +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, params, pledgedSrcSize, zbuff); + } +} -static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - const U32 mls = ctx->params.cParams.searchLength; - switch(mls) + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); + + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_noMemset, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); + memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* copy dictionary offsets */ { - default: - case 4 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; - } -} - - -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashLong = ctx->hashTable; - U32 const hBitsL = ctx->params.cParams.hashLog; - U32* const hashSmall = ctx->chainTable; - U32 const hBitsS = ctx->params.cParams.chainLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; - const BYTE* matchLong = matchLongBase + matchLongIndex; - - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ - - if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; - offset = current - matchLongIndex; - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; - const BYTE* match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = current + 1; - if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; - ip++; - offset = current+1 - matchIndex3; - while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; - offset = current - matchIndex; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; - } else { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } } + /* copy block state */ + memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; column= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } +static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) +{ + ZSTD_reduceTable_internal(table, size, reducerValue, 0); +} - /* save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; +static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) +{ + ZSTD_reduceTable_internal(table, size, reducerValue, 1); +} - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); } -} + if (zc->appliedParams.cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; + if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } -static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - U32 const mls = ctx->params.cParams.searchLength; - switch(mls) - { - default: - case 4 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); } } -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. -* ip : assumed <= iend-8 . -* @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = zc->lowLimit; - U32 matchEndIdx = current+8; - size_t bestLength = 8; -#ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); -#endif /* ZSTD_C_PREDICT */ - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - -#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } +/*-******************************************************* +* Block entropic compression +*********************************************************/ - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } +/* See doc/zstd_compression_format.md for detailed format description */ - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ +static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall); + MEM_writeLE24(dst, cBlockHeader24); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} - if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - if (matchEndIdx > current + 8) return matchEndIdx - current - 8; - return 1; -} + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } -static size_t ZSTD_insertBtAndFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 nbCompares, const U32 mls, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - *smallerPtr = *largerPtr = 0; + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return bestLength; + ostart[flSize] = *(const BYTE*)src; + return flSize+1; } -static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) { - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; } -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) +static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + const int bmi2) { - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); -} + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); -static size_t ZSTD_BtFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); -static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } - while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); -} + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } -/** Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return lhSize+cLitSize; } -static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] -/* Update chains up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) { - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; + return cost >> 8; } - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls, const U32 extDict) +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) { - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.cParams.chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 current = (U32)(ip-base); - const U32 minChain = current > chainSize ? current - chainSize : 0; - int nbAttempts=maxNbAttempts; - size_t ml=EQUAL_READ32-1; + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; - } +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} - /* save best solution */ - if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +static size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, + "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + RETURN_ERROR_IF(bitCost >= badCost, GENERIC, + "Repeat FSE_CTable has Prob[%u] == 0", s); + cost += count[s] * bitCost; } - - return ml; + return cost >> kAccuracyLog; } - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) { - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +MEM_STATIC symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) { - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base + ctx->dictLimit; - - U32 const maxSearches = 1 << ctx->params.cParams.searchLog; - U32 const mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0; - - /* init */ - ip += (ip==base); - ctx->nextToUpdate3 = ctx->nextToUpdate; - { U32 const maxRep = (U32)(ip-base); - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; - } - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - - /* check repCode */ - if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { - /* repcode : we take it */ - matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; - if (depth==0) goto _storeSequence; + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } } - - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(ml2 * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; } - - /* catch up */ - if (offset) { - while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ - { start--; matchLength++; } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; +MEM_STATIC size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); + *op = codeTable[0]; + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; } + } + default: assert(0); RETURN_ERROR(GENERIC); + } +} - /* check immediate repcode */ - while ( (ip <= ilimit) - && ((offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); } } - /* Save reps for next block */ - ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset; - ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset; + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; } } - -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); } -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); } -static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +#endif + +static size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); } -static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } } - -FORCE_INLINE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) +/* ZSTD_compressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* workspace, size_t wkspSize, + const int bmi2) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); - - /* check repCode */ - { const U32 repIndex = (U32)(current+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - if (depth==0) goto _storeSequence; - } } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned count[MaxSeq+1]; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* seqHead; + BYTE* lastNCount = NULL; - if (matchLength < EQUAL_READ32) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<=1) - while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 1 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - int gain2 = (int)(repLength * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 2 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + workspace, wkspSize, + bmi2); + FORWARD_IF_ERROR(cSize); + assert(cSize <= dstCapacity); + op += cSize; + } - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return op - ostart; + } - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } + /* seqHead : flags for FSE encoding type */ + seqHead = op++; - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - offset_2); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), + workspace, wkspSize); + FORWARD_IF_ERROR(countSize); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), + workspace, wkspSize); + FORWARD_IF_ERROR(countSize); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), + workspace, wkspSize); + FORWARD_IF_ERROR(countSize); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; } } - /* Save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); -} + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return op - ostart; } -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +MEM_STATIC size_t +ZSTD_compressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); -} + size_t const cSize = ZSTD_compressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + workspace, wkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); + return cSize; } - -/* The optimal parser */ -#include "zstd_opt.h" - -static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) { -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif -} + static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); -static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; } -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) { -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; } -static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +void ZSTD_resetSeqStore(seqStore_t* ssPtr) { -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; } - -typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); - -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) { - static const ZSTD_blockCompressor blockCompressor[2][8] = { - { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 }, - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict } - }; - - return blockCompressor[extDict][(U32)strat]; -} + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + size_t cSize; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + cSize = 0; + goto out; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (current > ms->nextToUpdate + 384) + ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); + } + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize)); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); - const BYTE* const base = zc->base; - const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ - ZSTD_resetSeqStore(&(zc->seqStore)); - if (current > zc->nextToUpdate + 384) - zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */ - blockCompressor(zc, src, srcSize); - return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); + /* encode sequences and literals */ + cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + +out: + if (!ZSTD_isError(cSize) && cSize != 0) { + /* confirm repcodes and entropy tables when emitting a compressed block */ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; } -/*! ZSTD_compress_generic() : +/*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. * Frame is supposed already started (header already produced) * @return : compressed size, or an error code */ -static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastFrameChunk) @@ -2270,61 +2843,60 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; - U32 const maxDist = 1 << cctx->params.cParams.windowLog; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= 31); - if (cctx->params.fParams.checksumFlag && srcSize) + DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) XXH64_update(&cctx->xxhState, src, srcSize); while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - size_t cSize; - if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; - /* preemptive overflow correction */ - if (cctx->lowLimit > (2U<<30)) { - U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1; - U32 const current = (U32)(ip - cctx->base); - U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog); - U32 const correction = current - newCurrent; - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30); + if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { + U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); ZSTD_reduceIndex(cctx, correction); - cctx->base += correction; - cctx->dictBase += correction; - cctx->lowLimit -= correction; - cctx->dictLimit -= correction; - if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0; - else cctx->nextToUpdate -= correction; - } - - if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { - /* enforce maxDist */ - U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; - if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; - if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit; + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; } + ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; + { size_t cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize); + FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); - if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); - MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ - memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); - cSize = ZSTD_blockHeaderSize+blockSize; - } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader24); - cSize += ZSTD_blockHeaderSize; - } + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize); + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } - remaining -= blockSize; - dstCapacity -= cSize; - ip += blockSize; - op += cSize; - } + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; return op-ostart; @@ -2332,27 +2904,33 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) + ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) { BYTE* const op = (BYTE*)dst; - U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ U32 const checksumFlag = params.fParams.checksumFlag>0; - U32 const windowSize = 1U << params.cParams.windowLog; - U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1)); + U32 const windowSize = (U32)1 << params.cParams.windowLog; + U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); U32 const fcsCode = params.fParams.contentSizeFlag ? - (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */ - 0; - BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); - size_t pos; - - if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); - - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = frameHeaderDecriptionByte; pos=5; + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + + if (params.format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; if (!singleSegment) op[pos++] = windowLogByte; switch(dictIDSizeCode) { - default: /* impossible */ + default: assert(0); /* impossible */ case 0 : break; case 1 : op[pos] = (BYTE)(dictID); pos++; break; case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; @@ -2360,7 +2938,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, } switch(fcsCode) { - default: /* impossible */ + default: assert(0); /* impossible */ case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; @@ -2369,119 +2947,173 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, return pos; } +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + return 0; +} + static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame, U32 lastFrameChunk) { - const BYTE* const ip = (const BYTE*) src; + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; size_t fhSize = 0; - if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID); - if (ZSTD_isError(fhSize)) return fhSize; + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; dst = (char*)dst + fhSize; cctx->stage = ZSTDcs_ongoing; } - /* Check if blocks follow each other */ - if (src != cctx->nextSrc) { - /* not contiguous */ - ptrdiff_t const delta = cctx->nextSrc - ip; - cctx->lowLimit = cctx->dictLimit; - cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); - cctx->dictBase = cctx->base; - cctx->base -= delta; - cctx->nextToUpdate = cctx->dictLimit; - if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */ - } + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx; - cctx->lowLimit = lowLimitMax; + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); } - cctx->nextSrc = ip + srcSize; + if (!frame) { + /* overflow check and correction for block mode */ + if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) { + U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_reduceIndex(cctx, correction); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } + } + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); { size_t const cSize = frame ? - ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } return cSize + fhSize; } } - size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0); + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); } -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx) +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) { - return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); } size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); - if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); -} + size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} -static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) { const BYTE* const ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; - /* input becomes current prefix */ - zc->lowLimit = zc->dictLimit; - zc->dictLimit = (U32)(zc->nextSrc - zc->base); - zc->dictBase = zc->base; - zc->base += ip - zc->nextSrc; - zc->nextToUpdate = zc->dictLimit; - zc->loadedDictEnd = (U32)(iend - zc->base); + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); - zc->nextSrc = iend; if (srcSize <= HASH_READ_SIZE) return 0; - switch(zc->params.cParams.strategy) + switch(params->cParams.strategy) { case ZSTD_fast: - ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); + ZSTD_fillHashTable(ms, iend, dtlm); break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength); + ZSTD_fillDoubleHashTable(ms, iend, dtlm); break; case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength); + if (srcSize >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); break; - case ZSTD_btlazy2: + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ case ZSTD_btopt: - case ZSTD_btopt2: - ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + case ZSTD_btultra: + case ZSTD_btultra2: + if (srcSize >= HASH_READ_SIZE) + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); break; default: - return ERROR(GENERIC); /* strategy doesn't exist; impossible */ + assert(0); /* not possible : not a valid strategy id */ } - zc->nextToUpdate = zc->loadedDictEnd; + ms->nextToUpdate = (U32)(iend - ms->window.base); return 0; } @@ -2492,120 +3124,207 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t NOTE: This behavior is not standard and could be improved in the future. */ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { U32 s; - if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); for (s = 0; s <= maxSymbolValue; ++s) { - if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); } return 0; } /* Dictionary format : - Magic == ZSTD_DICT_MAGIC (4 bytes) - HUF_writeCTable(256) - FSE_writeNCount(off) - FSE_writeNCount(ml) - FSE_writeNCount(ll) - RepOffsets - Dictionary content -*/ -/*! ZSTD_loadDictEntropyStats() : - @return : size read from dictionary - note : magic number supposed already checked */ -static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff; - BYTE scratchBuffer[1<= (1< 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictPtr += 4; /* skip magic number */ + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; - { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); - if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + { unsigned maxSymbolValue = 255; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); dictPtr += hufHeaderSize; } { unsigned offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); /* Every match length code must have non-zero probability */ - CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); /* Every literal length code must have non-zero probability */ - CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += litlengthHeaderSize; } - if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); - cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); - cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); - cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; - { U32 offcodeMax = MaxOff; - if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */ - /* Calculate minimum offset code required to represent maxOffset */ - offcodeMax = ZSTD_highbit32(maxOffset); + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ } - /* Every possible supported offset <= dictContentSize + 128 KB must be representable */ - CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); - } + /* All offset values <= dictContentSize + 128 KB must be representable */ + FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); + } } - cctx->flagStaticTables = 1; - return dictPtr - (const BYTE*)dict; + bs->entropy.huf.repeatMode = HUF_repeat_valid; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); + return dictID; + } } /** ZSTD_compress_insertDictionary() : -* @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) { + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<=8)) return 0; - /* default : dict is pure content */ - if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); - zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4); + ZSTD_reset_compressedBlockState(bs); - /* known magic number : dict is parsed for entropy stats and content */ - { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8); - size_t const eSize = loadError + 8; - if (ZSTD_isError(loadError)) return loadError; - return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + assert(0); /* impossible */ } -} + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); +} /*! ZSTD_compressBegin_internal() : -* @return : 0, or an error code */ + * @return : 0, or an error code */ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, U64 pledgedSrcSize) + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue; - CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp)); - return ZSTD_compress_insertDictionary(cctx, dict, dictSize); + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + if (cdict && cdict->dictContentSize>0) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_continue, zbuff) ); + { + size_t const dictID = ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID); + assert(dictID <= (size_t)(U32)-1); + cctx->dictID = (U32)dictID; + } + return 0; } +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} /*! ZSTD_compressBegin_advanced() : * @return : 0, or an error code */ @@ -2613,22 +3332,27 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) { - /* compression parameters verification and optimization */ - CHECK_F(ZSTD_checkCParams(params.cParams)); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + cctxParams, pledgedSrcSize); } - size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } - -size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) { - return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); } @@ -2641,12 +3365,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) BYTE* op = ostart; size_t fhSize = 0; - if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); - if (ZSTD_isError(fhSize)) return fhSize; + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; op += fhSize; cctx->stage = ZSTDcs_ongoing; @@ -2655,15 +3380,16 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->stage != ZSTDcs_ending) { /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - if (dstCapacity<4) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); MEM_writeLE32(op, cBlockHeader24); op += ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize; } - if (cctx->params.fParams.checksumFlag) { + if (cctx->appliedParams.fParams.checksumFlag) { U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - if (dstCapacity<4) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); MEM_writeLE32(op, checksum); op += 4; } @@ -2672,159 +3398,364 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) return op-ostart; } - size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1); - if (ZSTD_isError(cSize)) return cSize; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize); endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - if (ZSTD_isError(endResult)) return endResult; + FORWARD_IF_ERROR(endResult); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } return cSize + endResult; } static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) { - CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize)); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + cctxParams); } -size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, ZSTD_parameters params) { - CHECK_F(ZSTD_checkCParams(params.cParams)); - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); + return ZSTD_compress_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + params); } -size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params) { - ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0); - params.fParams.contentSizeFlag = 1; - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) ); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + assert(params.fParams.contentSizeFlag == 1); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); } -size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) { - return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); } -size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) { size_t result; ZSTD_CCtx ctxBody; - memset(&ctxBody, 0, sizeof(ctxBody)); - memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem)); + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); - ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */ + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ return result; } /* ===== Dictionary API ===== */ -struct ZSTD_CDict_s { - void* dictContent; - size_t dictContentSize; - ZSTD_CCtx* refContext; -}; /* typedef'd tp ZSTD_CDict within "zstd.h" */ +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support sizeof on NULL */ - return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize; + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(cParams)); + cdict->matchState.cParams = cParams; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictBuffer = NULL; + cdict->dictContent = dictBuffer; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); + cdict->dictBuffer = internalBuffer; + cdict->dictContent = internalBuffer; + RETURN_ERROR_IF(!internalBuffer, memory_allocation); + memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + { void* const end = ZSTD_reset_matchState( + &cdict->matchState, + (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, + &cParams, ZSTDcrp_continue, /* forCCtx */ 0); + assert(end == (char*)cdict->workspace + cdict->workspaceSize); + (void)end; + } + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is <= 8 bytes. + */ + { ZSTD_CCtx_params params; + memset(¶ms, 0, sizeof(params)); + params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + params.cParams = cParams; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, ¶ms, + cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->workspace); + FORWARD_IF_ERROR(dictID); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem) +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) { - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem); - void* const dictContent = ZSTD_malloc(dictSize, customMem); - ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); + { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); + size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + void* const workspace = ZSTD_malloc(workspaceSize, customMem); - if (!dictContent || !cdict || !cctx) { - ZSTD_free(dictContent, customMem); + if (!cdict || !workspace) { ZSTD_free(cdict, customMem); - ZSTD_free(cctx, customMem); + ZSTD_free(workspace, customMem); return NULL; } - - if (dictSize) { - memcpy(dictContent, dict, dictSize); + cdict->customMem = customMem; + cdict->workspace = workspace; + cdict->workspaceSize = workspaceSize; + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + cParams) )) { + ZSTD_freeCDict(cdict); + return NULL; } - { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0); - if (ZSTD_isError(errorCode)) { - ZSTD_free(dictContent, customMem); - ZSTD_free(cdict, customMem); - ZSTD_free(cctx, customMem); - return NULL; - } } - cdict->dictContent = dictContent; - cdict->dictContentSize = dictSize; - cdict->refContext = cctx; return cdict; } } ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize); - params.fParams.contentSizeFlag = 1; - return ZSTD_createCDict_advanced(dict, dictSize, params, allocator); + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); } size_t ZSTD_freeCDict(ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = cdict->refContext->customMem; - ZSTD_freeCCtx(cdict->refContext); - ZSTD_free(cdict->dictContent, cMem); + { ZSTD_customMem const cMem = cdict->customMem; + ZSTD_free(cdict->workspace, cMem); + ZSTD_free(cdict->dictBuffer, cMem); ZSTD_free(cdict, cMem); return 0; } } -static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) { - return ZSTD_getParamsFromCCtx(cdict->refContext); +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) + + HUF_WORKSPACE_SIZE + matchStateSize; + ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; + void* ptr; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(cdict+1, dict, dictSize); + dict = cdict+1; + ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; + } else { + ptr = cdict+1; + } + cdict->workspace = ptr; + cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType, + cParams) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; } -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize) +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { - if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) - else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize)); - return 0; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); + { ZSTD_CCtx_params params = cctx->requestedParams; + params.cParams = ZSTD_getCParamsFromCDict(cdict); + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); + } + params.fParams = fParams; + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); + } +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } /*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation */ + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict) { - CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize)); - - if (cdict->refContext->params.fParams.contentSizeFlag==1) { - cctx->params.fParams.contentSizeFlag = 1; - cctx->frameContentSize = srcSize; - } - - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); } @@ -2833,459 +3764,654 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, * Streaming ********************************************************************/ -typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; - -struct ZSTD_CStream_s { - ZSTD_CCtx* cctx; - ZSTD_CDict* cdictLocal; - const ZSTD_CDict* cdict; - char* inBuff; - size_t inBuffSize; - size_t inToCompress; - size_t inBuffPos; - size_t inBuffTarget; - size_t blockSize; - char* outBuff; - size_t outBuffSize; - size_t outBuffContentSize; - size_t outBuffFlushedSize; - ZSTD_cStreamStage stage; - U32 checksum; - U32 frameEnded; - U64 pledgedSrcSize; - U64 inputProcessed; - ZSTD_parameters params; - ZSTD_customMem customMem; -}; /* typedef'd to ZSTD_CStream within "zstd.h" */ - ZSTD_CStream* ZSTD_createCStream(void) { - return ZSTD_createCStream_advanced(defaultCustomMem); + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); } -ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) { - ZSTD_CStream* zcs; - - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} - zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem); - if (zcs==NULL) return NULL; - memset(zcs, 0, sizeof(ZSTD_CStream)); - memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem)); - zcs->cctx = ZSTD_createCCtx_advanced(customMem); - if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; } - return zcs; +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); } size_t ZSTD_freeCStream(ZSTD_CStream* zcs) { - if (zcs==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = zcs->customMem; - ZSTD_freeCCtx(zcs->cctx); - ZSTD_freeCDict(zcs->cdictLocal); - ZSTD_free(zcs->inBuff, cMem); - ZSTD_free(zcs->outBuff, cMem); - ZSTD_free(zcs, cMem); - return 0; - } + return ZSTD_freeCCtx(zcs); /* same object */ } + /*====== Initialization ======*/ -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } -size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; } +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +size_t ZSTD_CStreamOutSize(void) { - if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */ - - if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) - else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} - zcs->inToCompress = 0; - zcs->inBuffPos = 0; - zcs->inBuffTarget = zcs->blockSize; - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; - zcs->frameEnded = 0; - zcs->pledgedSrcSize = pledgedSrcSize; - zcs->inputProcessed = 0; +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, + const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_resetCStream_internal"); + /* Finalize the compression parameters */ + params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, ZSTD_dtlm_fast, + cdict, + params, pledgedSrcSize, + ZSTDb_buffered) ); + + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + cctx->inBuffTarget = cctx->blockSize + + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; return 0; /* ready to go */ } -size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - /* allocate buffers */ - { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; - if (zcs->inBuffSize < neededInBuffSize) { - zcs->inBuffSize = neededInBuffSize; - ZSTD_free(zcs->inBuff, zcs->customMem); - zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem); - if (zcs->inBuff == NULL) return ERROR(memory_allocation); - } - zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); - } - if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) { - zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1; - ZSTD_free(zcs->outBuff, zcs->customMem); - zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem); - if (zcs->outBuff == NULL) return ERROR(memory_allocation); - } +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + return 0; +} +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + zcs->requestedParams = params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem); - if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); - zcs->cdict = zcs->cdictLocal; - } else zcs->cdict = NULL; - - zcs->checksum = params.fParams.checksumFlag > 0; - zcs->params = params; + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + } + return 0; +} - return ZSTD_resetCStream(zcs, pledgedSrcSize); +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + return 0; } /* note : cdict must outlive compression session */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { - ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); - size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0); - zcs->cdict = cdict; - return initError; + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + return 0; } -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); - return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0); + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + return 0; } -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); - return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + return 0; } -size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) { - return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel); + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + return 0; } -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) { - if (zcs==NULL) return 0; /* support sizeof on NULL */ - return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize; + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + return 0; } /*====== Compression ======*/ -typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e; +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) { size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); + if (length) memcpy(dst, src, length); return length; } +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - void* dst, size_t* dstCapacityPtr, - const void* src, size_t* srcSizePtr, - ZSTD_flush_e const flush) + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) { + const char* const istart = (const char*)input->src; + const char* const iend = istart + input->size; + const char* ip = istart + input->pos; + char* const ostart = (char*)output->dst; + char* const oend = ostart + output->size; + char* op = ostart + output->pos; U32 someMoreWork = 1; - const char* const istart = (const char*)src; - const char* const iend = istart + *srcSizePtr; - const char* ip = istart; - char* const ostart = (char*)dst; - char* const oend = ostart + *dstCapacityPtr; - char* op = ostart; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + assert(output->pos <= output->size); + assert(input->pos <= input->size); while (someMoreWork) { - switch(zcs->stage) + switch(zcs->streamStage) { - case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); case zcss_load: - /* complete inBuffer */ + if ( (flushMode == ZSTD_e_end) + && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer */ { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); zcs->inBuffPos += loaded; ip += loaded; - if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) { - someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */ - } } + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); { void* cDst; size_t cSize; size_t const iSize = zcs->inBuffPos - zcs->inToCompress; size_t oSize = oend-op; + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress directly into output buffer (avoid flush stage) */ + cDst = op; /* compress into output buffer, to skip flush stage */ else cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = (flush == zsf_end) ? - ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) : - ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); - if (ZSTD_isError(cSize)) return cSize; - if (flush == zsf_end) zcs->frameEnded = 1; + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize); + zcs->frameEnded = lastBlock; /* prepare next block */ zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */ + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { op += cSize; break; } /* no need to flush */ + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } zcs->outBuffContentSize = cSize; zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_flush; /* pass-through to flush stage */ + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ } - + /* fall-through */ case zcss_flush: + DEBUGLOG(5, "flush stage"); { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + size_t const flushed = ZSTD_limitCopy(op, oend-op, + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); op += flushed; zcs->outBuffFlushedSize += flushed; - if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */ + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; break; } - case zcss_final: - someMoreWork = 0; /* do nothing */ - break; - - default: - return ERROR(GENERIC); /* impossible */ + default: /* impossible */ + assert(0); } } - *srcSizePtr = ip - istart; - *dstCapacityPtr = op - ostart; - zcs->inputProcessed += *srcSizePtr; + input->pos = ip - istart; + output->pos = op - ostart; if (zcs->frameEnded) return 0; - { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; - if (hintInSize==0) hintInSize = zcs->blockSize; - return hintInSize; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); } +#endif + return ZSTD_nextInputSizeHint(cctx); + } size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - size_t sizeRead = input->size - input->pos; - size_t sizeWritten = output->size - output->pos; - size_t const result = ZSTD_compressStream_generic(zcs, - (char*)(output->dst) + output->pos, &sizeWritten, - (const char*)(input->src) + input->pos, &sizeRead, zsf_gather); - input->pos += sizeRead; - output->pos += sizeWritten; - return result; + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, GENERIC); + RETURN_ERROR_IF(input->pos > input->size, GENERIC); + assert(cctx!=NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + cctx->streamStage = zcss_load; + cctx->appliedParams.nbWorkers = params.nbWorkers; + } else +#endif + { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, + params, cctx->pledgedSrcSizePlusOne-1) ); + assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbWorkers == 0); + } } + /* end of transparent initialization stage */ + + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + size_t flushMin; + assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + do { + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin); + } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + return flushMin; + } +#endif + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ } +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + FORWARD_IF_ERROR(result); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} /*====== Finalize ======*/ /*! ZSTD_flushStream() : -* @return : amount of data remaining to flush */ + * @return : amount of data remaining to flush */ size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const result = ZSTD_compressStream_generic(zcs, - (char*)(output->dst) + output->pos, &sizeWritten, - &srcSize, &srcSize, /* use a valid src address instead of NULL */ - zsf_flush); - output->pos += sizeWritten; - if (ZSTD_isError(result)) return result; - return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); } size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { - BYTE* const ostart = (BYTE*)(output->dst) + output->pos; - BYTE* const oend = (BYTE*)(output->dst) + output->size; - BYTE* op = ostart; - - if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize)) - return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */ - - if (zcs->stage != zcss_final) { - /* flush whatever remains */ - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */ - size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - op += sizeWritten; - if (remainingToFlush) { - output->pos += sizeWritten; - return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4); - } - /* create epilogue */ - zcs->stage = zcss_final; - zcs->outBuffContentSize = !notEnded ? 0 : - ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */ - } - - /* flush epilogue */ - { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - op += flushed; - zcs->outBuffFlushedSize += flushed; - output->pos += op-ostart; - if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */ - return toFlush - flushed; + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush ); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; } } - /*-===== Pre-defined compression levels =====-*/ -#define ZSTD_DEFAULT_CLEVEL 1 #define ZSTD_MAX_CLEVEL 22 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" */ +{ /* "default" - for any srcSize > 256 KB */ /* W, C, H, S, L, TL, strat */ - { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */ - { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */ - { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */ - { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/ - { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/ - { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */ - { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ - { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */ - { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */ - { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */ - { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */ - { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */ - { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */ - { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */ - { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */ - { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */ - { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ }, { /* for srcSize <= 256 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */ - { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */ - { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ - { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ - { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ - { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/ - { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/ - { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */ - { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/ - { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/ - { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */ - { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ - { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ - { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ - { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ - { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/ - { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/ - { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ }, { /* for srcSize <= 128 KB */ /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */ - { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */ - { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */ - { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ - { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ - { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ - { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ - { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ - { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */ - { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/ - { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ - { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ - { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ - { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ - { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/ - { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/ - { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ }, { /* for srcSize <= 16 KB */ /* W, C, H, S, L, T, strat */ - { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */ - { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */ - { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */ - { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ - { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ - { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ - { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ - { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ - { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ - { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/ - { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/ - { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ }, }; /*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. -* Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) -{ - ZSTD_compressionParameters cp; - size_t const addedSize = srcSize ? 0 : 500; - U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ - if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ - if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; - cp = ZSTD_defaultCParameters[tableID][compressionLevel]; - if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ - if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; - if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; - if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; - } - cp = ZSTD_adjustCParams(cp, srcSize, dictSize); - return cp; + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + size_t const addedSize = srcSizeHint ? 0 : 500; + U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row = compressionLevel; + DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ + } } /*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). -* All fields of `ZSTD_frameParameters` are set to default (0) */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) { + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); memset(¶ms, 0, sizeof(params)); params.cParams = cParams; + params.fParams.contentSizeFlag = 1; return params; } diff --git a/Perl/shared/zstd/compress/zstd_compress_internal.h b/Perl/shared/zstd/compress/zstd_compress_internal.h new file mode 100644 index 000000000..cc3cbb9da --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_compress_internal.h @@ -0,0 +1,863 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy + Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + U32 CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 nextToUpdate3; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t * dictMatchState; + ZSTD_compressionParameters cParams; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * `mlBase` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) +{ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + ZSTD_dictMatchState : + ZSTD_noDict; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window.base); + return current > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const newCurrent = (current & cycleMask) + maxDist; + U32 const correction = current - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(current > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + window->lowLimit -= correction; + window->dictLimit -= correction; + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * This allows a simple check that index >= lowLimit to see if index is valid. + * This must be called before a block compression call, with srcEnd as the block + * source end. + * + * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. + * This is because dictionaries are allowed to be referenced as long as the last + * byte of the dictionary is in the window, but once they are out of range, + * they cannot be referenced. If loadedDictEndPtr is NULL, we use + * loadedDictEnd == 0. + * + * In normal dict mode, the dict is between lowLimit and dictLimit. In + * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary + * is below them. forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + void const* srcEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); + U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", + (unsigned)blockEndIdx, (unsigned)maxDist); + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + if (loadedDictEndPtr) + *loadedDictEndPtr = 0; + if (dictMatchStatePtr) + *dictMatchStatePtr = NULL; + } +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + // ms->nextToUpdate = window->dictLimit; + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + +#if defined (__cplusplus) +} +#endif + + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = current + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = current + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } + } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(current - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } + } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + +_search_next_long: + + { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } + } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(current - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + prefixStartIndex; + const U32 dictStartIndex = ms->window.lowLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/Perl/shared/zstd/compress/zstd_double_fast.h b/Perl/shared/zstd/compress/zstd_double_fast.h new file mode 100644 index 000000000..4fa31acfc --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_double_fast.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/Perl/shared/zstd/compress/zstd_fast.c b/Perl/shared/zstd/compress/zstd_fast.c new file mode 100644 index 000000000..ed997b441 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_fast.c @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_fast.h" + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = current; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = current + p; + } } } } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { + U32 const maxRep = (U32)(ip0 - prefixStart); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2-offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = ip2[-1] == repMatch[-1] ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { + size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 0; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; + ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + ip1 = ip0 + 1; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + while ( (ip0 <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ip1 = ip0 + 1; + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32 const mls = cParams->minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* otherwise, we would get index underflow when translating a dict index + * into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = current; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+current+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32 const mls = cParams->minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 dictStartIndex = ms->window.lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + assert(offset_1 <= current +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32 const mls = cParams->minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/Perl/shared/zstd/compress/zstd_fast.h b/Perl/shared/zstd/compress/zstd_fast.h new file mode 100644 index 000000000..b74a88c57 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_fast.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/Perl/shared/zstd/compress/zstd_lazy.c b/Perl/shared/zstd/compress/zstd_lazy.c new file mode 100644 index 000000000..53f998a43 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_lazy.c @@ -0,0 +1,1106 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : current >= btlow == (current - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 current, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; + const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + current, dictLimit, windowLow); + assert(current >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (current < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (current < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + current, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const current = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const windowLow = ms->window.lowLimit; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); + assert(ip <= iend-8); /* required for h calculation */ + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = ms->window.lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dictMatchState) { + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? + (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + + /* init */ + ip += (dictAndPrefixLength == 0); + ms->nextToUpdate3 = ms->nextToUpdate; + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (dictMode == ZSTD_dictMatchState) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (dictMode == ZSTD_dictMatchState) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const U32 lowestIndex = ms->window.lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + /* init */ + ms->nextToUpdate3 = ms->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2); +} diff --git a/Perl/shared/zstd/compress/zstd_lazy.h b/Perl/shared/zstd/compress/zstd_lazy.h new file mode 100644 index 000000000..bb1763069 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_lazy.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ diff --git a/Perl/shared/zstd/compress/zstd_ldm.c b/Perl/shared/zstd/compress/zstd_ldm.c new file mode 100644 index 000000000..784d20f3a --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_ldm.c @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_ldm.h" + +#include "debug.h" +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 +#define LDM_HASH_CHAR_OFFSET 10 + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (cParams->strategy >= ZSTD_btopt) { + /* Get out of the way of the optimal parser */ + U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); + assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); + assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); + params->minMatchLength = minMatch; + } + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = + ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * If numBits==0, returns 0. + * @return : the most significant numBits of value. */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); +} + +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} + +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + assert(numTagBits < 32 && hbits <= 32); + if (32 - hbits < numTagBits) { + return hash & (((U32)1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + } +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; +} + +/** ZSTD_ldm_makeEntryAndInsertByTag() : + * + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmParams.hashRateLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmParams.hashRateLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 const rollingHash, + U32 const hBits, + U32 const offset, + ldmParams_t const ldmParams) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); + U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); + } +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits, ldmParams_t const ldmParams) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + + while (cur < iend) { + rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], + cur[ldmParams.minMatchLength-1], + state->hashPower); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base), ldmParams); + ++cur; + } + return rollingHash; +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - ms->window.base); + if (current > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + current - MIN(512, current - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U64 const hashPower = ldmState->hashPower; + U32 const hBits = params->hashLog - params->bucketSizeLog; + U32 const ldmBucketSize = 1U << params->bucketSizeLog; + U32 const hashRateLog = params->hashRateLog; + U32 const ldmTagMask = (1U << params->hashRateLog) - 1; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash */ + BYTE const* lastHashed = NULL; + U64 rollingHash = 0; + + while (ip <= ilimit) { + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], + lastHashed[minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); + } + lastHashed = ip; + + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + *params); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowPrefixPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, + hBits, current, + *params); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + { + /* Store the sequence: + * ip = current - backwardMatchLength + * The match is at (bestEntry->offset - backwardMatchLength) + */ + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(ip - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + *params); + + assert(ip + backwardMatchLength == lastHashed); + + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength <= ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, *params); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + } + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, src); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the lits */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(sequence.offset <= (1U << cParams->windowLog)); + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} diff --git a/Perl/shared/zstd/compress/zstd_ldm.h b/Perl/shared/zstd/compress/zstd_ldm.h new file mode 100644 index 000000000..a47846128 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_ldm.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include "zstd.h" /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/Perl/shared/zstd/compress/zstd_opt.c b/Perl/shared/zstd/compress/zstd_opt.c new file mode 100644 index 000000000..efb69d326 --- /dev/null +++ b/Perl/shared/zstd/compress/zstd_opt.c @@ -0,0 +1,1233 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "hist.h" +#include "zstd_opt.h" + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_litLengthContribution() : + * @return ( cost(litlength) - cost(0) ) + * this value can then be added to rawLiteralsCost() + * to provide a cost which is directly comparable to a match ending at same position */ +static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) + + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); +#if 1 + return contribution; +#else + return MAX(0, contribution); /* sometimes better, sometimes not ... */ +#endif + } +} + +/* ZSTD_literalsContribution() : + * creates a fake cost for the literals part of a sequence + * which can be compared to the ending cost of a match + * should a new match start at this position */ +static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + + ZSTD_litLengthContribution(litLength, optPtr, optLevel); + return contribution; +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = ms->nextToUpdate3; + U32 const target = ms->nextToUpdate3 = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = current+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = current; /* Update Hash Table */ + + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return matchEndIdx - (current + 8); +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) + idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + ZSTD_match_t* matches, + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = btMask >= current ? 0 : current - btMask; + U32 const windowLow = ms->window.lowLimit; + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(current >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); + if ((matchIndex3 >= matchLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = current+1; /* skip insertion */ + return 1; + } + } + } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + assert(current > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > current+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ms->nextToUpdate3 = ms->nextToUpdate; + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch); + U32 matchNb; + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + /* repcodes update : like ZSTD_updateRep(), but update in place */ + if (offCode >= ZSTD_REP_NUM) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offCode - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offCode + (llen==0); + if (repCode) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + if (repCode >= 2) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const current = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ diff --git a/Perl/shared/zstd/compress/zstd_opt.h b/Perl/shared/zstd/compress/zstd_opt.h index f071c4f30..094f74766 100644 --- a/Perl/shared/zstd/compress/zstd_opt.h +++ b/Perl/shared/zstd/compress/zstd_opt.h @@ -1,919 +1,56 @@ -/** - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H -/* Note : this file is intended to be included within zstd_compress.c */ +#if defined (__cplusplus) +extern "C" { +#endif +#include "zstd_compress_internal.h" -#ifndef ZSTD_OPT_H_91842398743 -#define ZSTD_OPT_H_91842398743 +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); -#define ZSTD_LITFREQ_ADD 2 -#define ZSTD_FREQ_DIV 4 -#define ZSTD_MAX_PRICE (1<<30) -/*-************************************* -* Price functions for optimal parser -***************************************/ -FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) -{ - ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1); - ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1); - ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1); - ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); - ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); -} - - -MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) -{ - unsigned u; - - ssPtr->cachedLiterals = NULL; - ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; - ssPtr->staticPrices = 0; - - if (ssPtr->litLengthSum == 0) { - if (srcSize <= 1024) ssPtr->staticPrices = 1; - - for (u=0; u<=MaxLit; u++) - ssPtr->litFreq[u] = 0; - for (u=0; ulitFreq[src[u]]++; - - ssPtr->litSum = 0; - ssPtr->litLengthSum = MaxLL+1; - ssPtr->matchLengthSum = MaxML+1; - ssPtr->offCodeSum = (MaxOff+1); - ssPtr->matchSum = (ZSTD_LITFREQ_ADD<litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; - } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->offCodeSum = 0; - ssPtr->matchSum = 0; - ssPtr->litSum = 0; - - for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) { - ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; - } - for (u=0; u<=MaxML; u++) { - ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); - } - ssPtr->matchSum *= ZSTD_LITFREQ_ADD; - for (u=0; u<=MaxOff; u++) { - ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; - } - } - - ZSTD_setLog2Prices(ssPtr); -} - - -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) -{ - U32 price, u; - - if (ssPtr->staticPrices) - return ZSTD_highbit32((U32)litLength+1) + (litLength*6); - - if (litLength == 0) - return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); - - /* literals */ - if (ssPtr->cachedLiterals == literals) { - U32 const additional = litLength - ssPtr->cachedLitLength; - const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; - price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; - for (u=0; u < additional; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1); - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } else { - price = litLength * ssPtr->log2litSum; - for (u=0; u < litLength; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1); - - if (litLength >= 12) { - ssPtr->cachedLiterals = literals; - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } - } - - /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1); - } - - return price; -} - - -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) -{ - /* offset */ - U32 price; - BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - - if (seqStorePtr->staticPrices) - return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; - - price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); - if (!ultra && offCode >= 20) price += (offCode-19)*2; - - /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1); - } - - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; -} - - -MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; - for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - - /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - seqStorePtr->litLengthFreq[llCode]++; - seqStorePtr->litLengthSum++; - } - - /* match offset */ - { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - seqStorePtr->offCodeSum++; - seqStorePtr->offCodeFreq[offCode]++; - } - - /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - seqStorePtr->matchLengthFreq[mlCode]++; - seqStorePtr->matchLengthSum++; - } - - ZSTD_setLog2Prices(seqStorePtr); -} - - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - - - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) -{ - U32* const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE* const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - return hashTable3[hash3]; -} - - -/*-************************************* -* Binary Tree search -***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) -{ - const BYTE* const base = zc->base; - const U32 current = (U32)(ip-base); - const U32 hashLog = zc->params.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const hashTable = zc->hashTable; - U32 matchIndex = hashTable[h]; - U32* const bt = zc->chainTable; - const U32 btLog = zc->params.cParams.chainLog - 1; - const U32 btMask= (1U << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatchLen-1; - - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex3; - if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; - matches[mnum].len = (U32)currentMl; - mnum++; - if (currentMl > ZSTD_OPT_NUM) goto update; - if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ - } - } - } - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) { - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; - } - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; - matches[mnum].len = (U32)matchLength; - mnum++; - if (matchLength > ZSTD_OPT_NUM) break; - if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - -update: - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return mnum; -} - - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - - -/*-******************************* -* Optimal parser -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const BYTE* const prefixStart = base + ctx->dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = seqStorePtr->priceTable; - ZSTD_match_t* matches = seqStorePtr->matchTable; - const BYTE* inr; - U32 offset, rep[ZSTD_REP_NUM]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - { U32 i; for (i=0; irep[i]; } - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i=(ip == anchor); i 0) && (repCur < (S32)(ip-prefixStart)) - && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { - mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - best_off = i - (ip == anchor); - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); - - if (!last_pos && !match_num) { ip++; continue; } - - if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - /* set prices using matches at position = 0 */ - best_mlen = (last_pos) ? last_pos : minMatch; - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } } - - if (last_pos < minMatch) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) - && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - if (litLength==0) offset--; - } - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { int i; for (i=0; isavedRep[i] = rep[i]; } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -FORCE_INLINE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = seqStorePtr->priceTable; - ZSTD_match_t* matches = seqStorePtr->matchTable; - const BYTE* inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { U32 i; for (i=0; irep[i]; } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 current = (U32)(ip-base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i = (ip==anchor); i 0 && repCur <= (S32)current) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip==anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { ip++; continue; } - - { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } } - - if (last_pos < minMatch) { - ip++; continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && matches[match_num-1].len > sufficient_len) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos; ) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength==0) offset--; - } +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); - /* Save reps for next block */ - { int i; for (i=0; isavedRep[i] = rep[i]; } + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ - /* Last Literals */ - { size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } +#if defined (__cplusplus) } +#endif -#endif /* ZSTD_OPT_H_91842398743 */ +#endif /* ZSTD_OPT_H */ diff --git a/Perl/shared/zstd/compress/zstdmt_compress.c b/Perl/shared/zstd/compress/zstdmt_compress.c new file mode 100644 index 000000000..38fbb9076 --- /dev/null +++ b/Perl/shared/zstd/compress/zstdmt_compress.c @@ -0,0 +1,2099 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Constants ====== */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 0 + + +/* ====== Dependencies ====== */ +#include /* memcpy, memset */ +#include /* INT_MAX, UINT_MAX */ +#include "mem.h" /* MEM_STATIC */ +#include "pool.h" /* threadpool */ +#include "threading.h" /* mutex */ +#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstd_ldm.h" +#include "zstdmt_compress.h" + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \ + && !defined(_MSC_VER) \ + && !defined(__MINGW32__) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} } + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_free(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_free(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_free(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_free(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + if (buf.start == NULL) return; /* compatible with release on NULL */ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_free(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0}; + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = {NULL, 0, 0, 0}; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (seqPool == NULL) return NULL; + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, nbWorkers); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + int totalCCtx; + int availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + int cid; + for (cid=0; cidtotalCCtx; cid++) + ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_free(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_free(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + int nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + serialState->ldmState.hashPower = + ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); + } else { + memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + size_t const bucketSize = (size_t)1 << bucketLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_clear(&serialState->ldmState.window); + serialState->ldmWindow = serialState->ldmState.window; + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_free(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + memset(serialState->ldmState.hashTable, 0, hashSize); + memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + } + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_free(serialState->ldmState.hashTable, cMem); + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + DEBUGLOG(5, "wait for serialState->cond"); + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +#define JOB_ERROR(e) { \ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ + job->cSize = e; \ + ZSTD_pthread_mutex_unlock(&job->job_mutex); \ + goto _endJob; \ +} + +/* ZSTDMT_compressionJob() is a POOL_function type */ +static void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + size_t lastCBlockSize = 0; + + /* resources */ + if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation)); + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) + JOB_ERROR(ERROR(memory_allocation)); + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); + } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) JOB_ERROR(hSize); + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); + assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + lastCBlockSize = cSize; + } } + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0); + job->cSize += lastCBlockSize; + job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */ + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +#define RSYNC_LENGTH 32 + +typedef struct { + U64 hash; + U64 hitMask; + U64 primePower; +} rsyncState_t; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + rsyncState_t rsync; + unsigned singleBlockingThread; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); +} + +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem); +#else + (void)nbWorkers; + (void)cMem; + return NULL; +#endif +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) +{ + return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + } + memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_free(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + +/* Internal only */ +size_t +ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, + ZSTDMT_parameter parameter, + int value) +{ + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter"); + switch(parameter) + { + case ZSTDMT_p_jobSize : + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value); + case ZSTDMT_p_overlapLog : + DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value); + case ZSTDMT_p_rsyncable : + DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value); + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value); + default : + return ERROR(parameter_unsupported); + } +} + +size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value) +{ + DEBUGLOG(4, "ZSTDMT_setMTCtxParameter"); + return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); +} + +size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value) +{ + switch (parameter) { + case ZSTDMT_p_jobSize: + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value); + case ZSTDMT_p_overlapLog: + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value); + case ZSTDMT_p_rsyncable: + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value); + default: + return ERROR(parameter_unsupported); + } +} + +/* Sets parameters relevant to the compression job, + * initializing others to default values. */ +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) +{ + ZSTD_CCtx_params jobParams = params; + /* Clear parameters related to multithreading */ + jobParams.forceWindow = 0; + jobParams.nbWorkers = 0; + jobParams.jobSize = 0; + jobParams.overlapLog = 0; + jobParams.rsyncable = 0; + memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t)); + memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem)); + return jobParams; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates a selected set of compression parameters, remaining compatible with currently active frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection inside workers. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(5, "ZSTDMT_getFrameProgression"); + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + fps.consumed = mtctx->consumed; + fps.produced = fps.flushed = mtctx->produced; + fps.currentJobID = mtctx->nextJobID; + fps.nbActiveWorkers = 0; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + fps.ingested += jobPtr->src.size; + fps.consumed += jobPtr->consumed; + fps.produced += produced; + fps.flushed += flushed; + fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) +{ + size_t toFlush; + unsigned const jobID = mtctx->doneJobID; + assert(jobID <= mtctx->nextJobID); + if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */ + + /* look into oldest non-fully-flushed job */ + { unsigned const wJobID = jobID & mtctx->jobIDMask; + ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + toFlush = produced - flushed; + if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { + /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ + assert(jobPtr->consumed < jobPtr->src.size); + } + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + + return toFlush; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) +{ + if (params.ldmParams.enableLdm) + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. */ + return MAX(21, params.cParams.chainLog + 4); + return MAX(20, params.cParams.windowLog + 2); +} + +static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) +{ + switch(strat) + { + case ZSTD_btultra2: + return 9; + case ZSTD_btultra: + case ZSTD_btopt: + return 8; + case ZSTD_btlazy2: + case ZSTD_lazy2: + return 7; + case ZSTD_lazy: + case ZSTD_greedy: + case ZSTD_dfast: + case ZSTD_fast: + default:; + } + return 6; +} + +static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) +{ + assert(0 <= ovlog && ovlog <= 9); + if (ovlog == 0) return ZSTDMT_overlapLog_default(strat); + return ovlog; +} + +static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) +{ + int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog); + assert(0 <= overlapRLog && overlapRLog <= 8); + if (params.ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. + * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ + ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + - overlapRLog; + } + assert(0 <= ovLog && ovLog <= 30); + DEBUGLOG(4, "overlapLog : %i", params.overlapLog); + DEBUGLOG(4, "overlap size : %i", 1 << ovLog); + return (ovLog==0) ? 0 : (size_t)1 << ovLog; +} + +static unsigned +ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) +{ + assert(nbWorkers>0); + { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); + size_t const jobMaxSize = jobSizeTarget << 2; + size_t const passSizeMax = jobMaxSize * nbWorkers; + unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; + unsigned const nbJobsLarge = multiplier * nbWorkers; + unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1; + unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers); + return (multiplier>1) ? nbJobsLarge : nbJobsSmall; +} } + +/* ZSTDMT_compress_advanced_internal() : + * This is a blocking function : it will only give back control to caller after finishing its compression job. + */ +static size_t ZSTDMT_compress_advanced_internal( + ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params) +{ + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); + size_t const overlapSize = ZSTDMT_computeOverlapSize(params); + unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); + size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; + size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ + const char* const srcStart = (const char*)src; + size_t remainingSrcSize = srcSize; + unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */ + size_t frameStartPos = 0, dstBufferPos = 0; + assert(jobParams.nbWorkers == 0); + assert(mtctx->cctxPool->totalCCtx == params.nbWorkers); + + params.jobSize = (U32)avgJobSize; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ", + nbJobs, (U32)proposedJobSize, (U32)avgJobSize); + + if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ + ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); + if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); + } + + assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) ); + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) + return ERROR(memory_allocation); + + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ + + { unsigned u; + for (u=0; ujobs[u].prefix.start = srcStart + frameStartPos - dictSize; + mtctx->jobs[u].prefix.size = dictSize; + mtctx->jobs[u].src.start = srcStart + frameStartPos; + mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */ + mtctx->jobs[u].consumed = 0; + mtctx->jobs[u].cSize = 0; + mtctx->jobs[u].cdict = (u==0) ? cdict : NULL; + mtctx->jobs[u].fullFrameSize = srcSize; + mtctx->jobs[u].params = jobParams; + /* do not calculate checksum within sections, but write it in header for first section */ + mtctx->jobs[u].dstBuff = dstBuffer; + mtctx->jobs[u].cctxPool = mtctx->cctxPool; + mtctx->jobs[u].bufPool = mtctx->bufPool; + mtctx->jobs[u].seqPool = mtctx->seqPool; + mtctx->jobs[u].serial = &mtctx->serial; + mtctx->jobs[u].jobID = u; + mtctx->jobs[u].firstJob = (u==0); + mtctx->jobs[u].lastJob = (u==nbJobs-1); + + DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize); + DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12); + POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]); + + frameStartPos += jobSize; + dstBufferPos += dstBufferCapacity; + remainingSrcSize -= jobSize; + } } + + /* collect result */ + { size_t error = 0, dstPos = 0; + unsigned jobID; + for (jobID=0; jobIDjobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID); + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + DEBUGLOG(5, "ready to write job %u ", jobID); + + { size_t const cSize = mtctx->jobs[jobID].cSize; + if (ZSTD_isError(cSize)) error = cSize; + if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); + if (jobID) { /* note : job 0 is written directly at dst, which is correct position */ + if (!error) + memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */ + if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */ + DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + } } + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + dstPos += cSize ; + } + } /* for (jobID=0; jobIDserial.xxhState); + if (dstPos + 4 > dstCapacity) { + error = ERROR(dstSize_tooSmall); + } else { + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)dst + dstPos, checksum); + dstPos += 4; + } } + + if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); + return error ? error : dstPos; + } +} + +size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + int overlapLog) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX); + cctxParams.overlapLog = overlapLog; + return ZSTDMT_compress_advanced_internal(mtctx, + dst, dstCapacity, + src, srcSize, + cdict, cctxParams); +} + + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); + int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy); + params.fParams.contentSizeFlag = 1; + return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog); +} + + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); + + if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; + + mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ + if (mtctx->singleBlockingThread) { + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); + DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); + assert(singleThreadParams.nbWorkers == 0); + return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], + dict, dictSize, cdict, + singleThreadParams, pledgedSrcSize); + } + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params); + DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); + } + if (params.rsyncable) { + /* Aim for the targetsectionSize as the average job size. */ + U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); + U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20; + assert(jobSizeMB >= 1); + DEBUGLOG(4, "rsyncLog = %u", rsyncBits); + mtctx->rsync.hash = 0; + mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; + mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize)) + return ERROR(memory_allocation); + return 0; +} + +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL, + cctxParams, pledgedSrcSize); +} + +size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */ + cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict); + cctxParams.fParams = fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict, + cctxParams, pledgedSrcSize); +} + + +/* ZSTDMT_resetCStream() : + * pledgedSrcSize can be zero == unknown (for the time being) + * prefer using ZSTD_CONTENTSIZE_UNKNOWN, + * as `0` might mean "empty" in the future */ +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) +{ + if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params, + pledgedSrcSize); +} + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) { + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successful if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0); + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * flush whatever data has been produced but not yet flushed in current job. + * move to next job if current one is fully flushed. + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job is completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + DEBUGLOG(5, "dstBuffer released"); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = rangeStart + range.size; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + DEBUGLOG(5, "ZSTDMT_doesOverlapWindow"); + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "ZSTDMT_waitForLdmComplete"); + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(5, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + DEBUGLOG(5, "ZSTDMT_tryGetInputRange"); + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + +typedef struct { + size_t toLoad; /* The number of bytes to load from the input. */ + int flush; /* Boolean declaring if we must flush because we found a synchronization point. */ +} syncPoint_t; + +/** + * Searches through the input for a synchronization point. If one is found, we + * will instruct the caller to flush, and return the number of bytes to load. + * Otherwise, we will load as many bytes as possible and instruct the caller + * to continue as normal. + */ +static syncPoint_t +findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) +{ + BYTE const* const istart = (BYTE const*)input.src + input.pos; + U64 const primePower = mtctx->rsync.primePower; + U64 const hitMask = mtctx->rsync.hitMask; + + syncPoint_t syncPoint; + U64 hash; + BYTE const* prev; + size_t pos; + + syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + syncPoint.flush = 0; + if (!mtctx->params.rsyncable) + /* Rsync is disabled. */ + return syncPoint; + if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH) + /* Not enough to compute the hash. + * We will miss any synchronization points in this RSYNC_LENGTH byte + * window. However, since it depends only in the internal buffers, if the + * state is already synchronized, we will remain synchronized. + * Additionally, the probability that we miss a synchronization point is + * low: RSYNC_LENGTH / targetSectionSize. + */ + return syncPoint; + /* Initialize the loop variables. */ + if (mtctx->inBuff.filled >= RSYNC_LENGTH) { + /* We have enough bytes buffered to initialize the hash. + * Start scanning at the beginning of the input. + */ + pos = 0; + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + } else { + /* We don't have enough bytes buffered to initialize the hash, but + * we know we have at least RSYNC_LENGTH bytes total. + * Start scanning after the first RSYNC_LENGTH bytes less the bytes + * already buffered. + */ + pos = RSYNC_LENGTH - mtctx->inBuff.filled; + prev = (BYTE const*)mtctx->inBuff.buffer.start - pos; + hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled); + hash = ZSTD_rollingHash_append(hash, istart, pos); + } + /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll + * through the input. If we hit a synchronization point, then cut the + * job off, and tell the compressor to flush the job. Otherwise, load + * all the bytes and continue as normal. + * If we go too long without a synchronization point (targetSectionSize) + * then a block will be emitted anyways, but this is okay, since if we + * are already synchronized we will remain synchronized. + */ + for (; pos < syncPoint.toLoad; ++pos) { + BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH]; + /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */ + hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower); + if ((hash & hitMask) == hitMask) { + syncPoint.toLoad = pos + 1; + syncPoint.flush = 1; + break; + } + } + return syncPoint; +} + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx) +{ + size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled; + if (hintInSize==0) hintInSize = mtctx->targetSectionSize; + return hintInSize; +} + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ + return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp); + } + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* single-pass shortcut (note : synchronous-mode) */ + if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */ + && (mtctx->nextJobID == 0) /* just started */ + && (mtctx->inBuff.filled == 0) /* nothing buffered */ + && (!mtctx->jobReady) /* no job already created */ + && (endOp == ZSTD_e_end) /* end order */ + && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */ + size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx, + (char*)output->dst + output->pos, output->size - output->pos, + (const char*)input->src + input->pos, input->size - input->pos, + mtctx->cdict, mtctx->params); + if (ZSTD_isError(cSize)) return cSize; + input->pos = input->size; + output->pos += cSize; + mtctx->allJobsCompleted = 1; + mtctx->frameEnded = 1; + return 0; + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed"); + assert(mtctx->doneJobID != mtctx->nextJobID); + } else + DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start); + } + if (mtctx->inBuff.buffer.start != NULL) { + syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input); + if (syncPoint.flush && endOp == ZSTD_e_continue) { + endOp = ZSTD_e_flush; + } + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + input->pos += syncPoint.toLoad; + mtctx->inBuff.filled += syncPoint.toLoad; + forwardInputProgress = syncPoint.toLoad>0; + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) + endOp = ZSTD_e_flush; /* can't end now : not all input consumed */ + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush); + return remainingToFlush; + } +} + + +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); + + /* recommended next input size : fill current input buffer */ + return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ +} + + +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame) +{ + size_t const srcSize = mtctx->inBuff.filled; + DEBUGLOG(5, "ZSTDMT_flushStream_internal"); + + if ( mtctx->jobReady /* one job ready for a worker to pick up */ + || (srcSize > 0) /* still some data within input buffer */ + || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ + DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", + (U32)srcSize, (U32)endFrame); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); + } + + /* check if there is any data available to flush */ + return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame); +} + + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(5, "ZSTDMT_flushStream"); + if (mtctx->singleBlockingThread) + return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush); +} + +size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(4, "ZSTDMT_endStream"); + if (mtctx->singleBlockingThread) + return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end); +} diff --git a/Perl/shared/zstd/compress/zstdmt_compress.h b/Perl/shared/zstd/compress/zstdmt_compress.h new file mode 100644 index 000000000..12e6bcb3a --- /dev/null +++ b/Perl/shared/zstd/compress/zstdmt_compress.h @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * These APIs used to be exposed with ZSTDLIB_API, + * because it used to be the only way to invoke MT compression. + * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2() + * instead. + * + * If you depend on these APIs and can't switch, then define + * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library. + * However, we may completely remove these functions in a future + * release, so please switch soon. + * + * This API requires ZSTD_MULTITHREAD to be defined during compilation, + * otherwise ZSTDMT_createCCtx*() will fail. + */ + +#ifdef ZSTD_LEGACY_MULTITHREADED_API +# define ZSTDMT_API ZSTDLIB_API +#else +# define ZSTDMT_API +#endif + +/* === Dependencies === */ +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ + + +/* === Constants === */ +#ifndef ZSTDMT_NBWORKERS_MAX +# define ZSTDMT_NBWORKERS_MAX 200 +#endif +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1 MB) +#endif +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) + + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem); +ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + + +/* === Simple one-pass compression function === */ + +ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + + + +/* === Streaming functions === */ + +ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ + +ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); +ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ + + +/* === Advanced functions and parameters === */ + +ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + int overlapLog); + +ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, + unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ + +ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fparams, + unsigned long long pledgedSrcSize); /* note : zero means empty */ + +/* ZSTDMT_parameter : + * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ +typedef enum { + ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ + ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ + ZSTDMT_p_rsyncable /* Enables rsyncable mode. */ +} ZSTDMT_parameter; + +/* ZSTDMT_setMTCtxParameter() : + * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. + * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ + * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); + +/* ZSTDMT_getMTCtxParameter() : + * Query the ZSTDMT_CCtx for a parameter value. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); + + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + + /*! ZSTDMT_toFlushNow() + * Tell how many bytes are ready to be flushed immediately. + * Probe the oldest active job (not yet entirely flushed) and check its output buffer. + * If return 0, it means there is no active job, + * or, it means oldest job is still active, but everything produced has been flushed so far, + * therefore flushing is limited by speed of oldest job. */ +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_CCtxParam_setMTCtxParameter() + * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */ +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value); + +/*! ZSTDMT_CCtxParam_setNbWorkers() + * Set nbWorkers, and clamp it. + * Also reset jobSize and overlapLog */ +size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/*! ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ diff --git a/Perl/shared/zstd/decompress/huf_decompress.c b/Perl/shared/zstd/decompress/huf_decompress.c index a342dfb1e..3f8bd2973 100644 --- a/Perl/shared/zstd/decompress/huf_decompress.c +++ b/Perl/shared/zstd/decompress/huf_decompress.c @@ -1,6 +1,7 @@ /* ****************************************************************** - Huffman decoder, part of New Generation Entropy library - Copyright (C) 2013-2016, Yann Collet. + huff0 huffman decoder, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -29,45 +30,95 @@ You can contact the author at : - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -/* inline is defined */ -#elif defined(_MSC_VER) || defined(__GNUC__) -# define inline __inline -#else -# define inline /* disable inline */ -#endif - -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - /* ************************************************************** * Dependencies ****************************************************************/ #include /* memcpy, memset */ +#include "compiler.h" #include "bitstream.h" /* BIT_* */ -#include "fse.h" /* header compression */ +#include "fse.h" /* to compress headers */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" +#include "error_private.h" + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif /* ************************************************************** * Error Management ****************************************************************/ -#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define HUF_isError ERR_isError +#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif /*-***************************/ /* generic DTableDesc */ /*-***************************/ - typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) @@ -78,23 +129,33 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) } +#ifndef HUF_FORCE_DECOMPRESS_X2 + /*-***************************/ /* single-symbol decoding */ /*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ - -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; - U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; U32 nbSymbols = 0; size_t iSize; void* const dtPtr = DTable + 1; - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + + U32* rankVal; + BYTE* huffWeight; + size_t spaceUsed32 = 0; - HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + rankVal = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; + huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); @@ -102,16 +163,16 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) /* Table header */ { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, huffman tree cannot fit in */ + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; memcpy(DTable, &dtd, sizeof(dtd)); } - /* Prepare ranks */ + /* Calculate starting value for each rank */ { U32 n, nextRankStart = 0; for (n=1; n> 1; - U32 i; - HUF_DEltX2 D; + U32 u; + HUF_DEltX1 D; D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (i = rankVal[w]; i < rankVal[w] + length; i++) - dt[i] = D; + for (u = rankVal[w]; u < rankVal[w] + length; u++) + dt[u] = D; rankVal[w] += length; } } return iSize; } +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} -static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ BYTE const c = dt[val].byte; @@ -141,41 +209,44 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con return c; } -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) { BYTE* const pStart = p; /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); } - /* closer to the end */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - /* no more data to retrieve from bitstream, hence no need to reload */ + /* no more data to retrieve from bitstream, no need to reload */ while (p < pEnd) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); return pEnd-pStart; } -static size_t HUF_decompress1X2_usingDTable_internal( +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -183,52 +254,22 @@ static size_t HUF_decompress1X2_usingDTable_internal( BYTE* op = (BYTE*)dst; BYTE* const oend = op + dstSize; const void* dtPtr = DTable + 1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; BIT_DStream_t bitD; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; - { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) return errorCode; } + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); - /* check */ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); return dstSize; } -size_t HUF_decompress1X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - - -static size_t HUF_decompress4X2_usingDTable_internal( +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -240,7 +281,7 @@ static size_t HUF_decompress4X2_usingDTable_internal( BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable + 1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; /* Init */ BIT_DStream_t bitD1; @@ -263,57 +304,58 @@ static size_t HUF_decompress4X2_usingDTable_internal( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal; + U32 endSignal = BIT_DStream_unfinished; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; } + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - /* 16-32 symbols per loop (4-8 symbols per stream) */ + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + BIT_reloadDStream(&bitD1); + BIT_reloadDStream(&bitD2); + BIT_reloadDStream(&bitD3); + BIT_reloadDStream(&bitD4); } /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ if (op1 > opStart2) return ERROR(corruption_detected); if (op2 > opStart3) return ERROR(corruption_detected); if (op3 > opStart4) return ERROR(corruption_detected); /* note : op4 supposed already verified within main loop */ /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } /* decoded size */ return dstSize; @@ -321,51 +363,123 @@ static size_t HUF_decompress4X2_usingDTable_internal( } -size_t HUF_decompress4X2_usingDTable( +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; -size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); + size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, + workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); } +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + +#ifndef HUF_FORCE_DECOMPRESS_X1 /* *************************/ /* double-symbols decoding */ /* *************************/ -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; -/* HUF_fillDTableX4Level2() : + +/* HUF_fillDTableX2Level2() : * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, const U32* rankValOrigin, const int minWeight, const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) { - HUF_DEltX4 DElt; + HUF_DEltX2 DElt; U32 rankVal[HUF_TABLELOG_MAX + 1]; /* get pre-calculated rankVal */ @@ -400,9 +514,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co } } } -typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; -static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) @@ -427,12 +540,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, int minWeight = nbBits + scaleLog; if (minWeight < 1) minWeight = 1; sortedRank = rankStart[minWeight]; - HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList+sortedRank, sortedListSize-sortedRank, nbBitsBaseline, symbol); } else { - HUF_DEltX4 DElt; + HUF_DEltX2 DElt; MEM_writeLE16(&(DElt.sequence), symbol); DElt.nbBits = (BYTE)(nbBits); DElt.length = 1; @@ -444,22 +557,42 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, } } -size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) { - BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; - sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; - U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 }; - U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 }; - U32* const rankStart = rankStart0+1; - rankVal_t rankVal; U32 tableLog, maxW, sizeOfSort, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); U32 const maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; - - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ @@ -513,7 +646,7 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) rankValPtr[w] = rankVal0[w] >> consumed; } } } } - HUF_fillDTableX4(dt, maxTableLog, + HUF_fillDTableX2(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog+1); @@ -524,8 +657,16 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) return iSize; } +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} -static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ memcpy(op, dt+val, 2); @@ -533,7 +674,8 @@ static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4 return dt[val].length; } -static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ memcpy(op, dt+val, 1); @@ -542,50 +684,52 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { BIT_skipBits(DStream, dt[val].nbBits); if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); } } return 1; } +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) { BYTE* const pStart = p; /* up to 8 symbols at a time */ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_1(p, bitDPtr); - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); } /* closer to end : up to 2 symbols at a time */ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); while (p <= pEnd-2) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ if (p < pEnd) - p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); return p-pStart; } - -static size_t HUF_decompress1X4_usingDTable_internal( +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -593,17 +737,15 @@ static size_t HUF_decompress1X4_usingDTable_internal( BIT_DStream_t bitD; /* Init */ - { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) return errorCode; - } + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); /* decode */ { BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); } /* check */ @@ -613,35 +755,9 @@ static size_t HUF_decompress1X4_usingDTable_internal( return dstSize; } -size_t HUF_decompress1X4_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); -} - -size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -static size_t HUF_decompress4X4_usingDTable_internal( +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -652,7 +768,7 @@ static size_t HUF_decompress4X4_usingDTable_internal( BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable+1; - const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; /* Init */ BIT_DStream_t bitD1; @@ -680,34 +796,30 @@ static size_t HUF_decompress4X4_usingDTable_internal( U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; } - { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; } + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_1(op1, &bitD1); - HUF_DECODE_SYMBOLX4_1(op2, &bitD2); - HUF_DECODE_SYMBOLX4_1(op3, &bitD3); - HUF_DECODE_SYMBOLX4_1(op4, &bitD4); - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_0(op1, &bitD1); - HUF_DECODE_SYMBOLX4_0(op2, &bitD2); - HUF_DECODE_SYMBOLX4_0(op3, &bitD3); - HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); } @@ -719,10 +831,10 @@ static size_t HUF_decompress4X4_usingDTable_internal( /* note : op4 already verified within main loop */ /* finish bitStreams one by one */ - HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); /* check */ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); @@ -733,48 +845,120 @@ static size_t HUF_decompress4X4_usingDTable_internal( } } +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) -size_t HUF_decompress4X4_usingDTable( +size_t HUF_decompress1X2_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} -size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; - size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } -size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); } -/* ********************************/ -/* Generic decompression selector */ -/* ********************************/ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : - HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, @@ -782,11 +966,22 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : - HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { @@ -808,22 +1003,35 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ }; +#endif /** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else /* decoder timing evaluation */ - U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ - - return DTime1 < DTime0; + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif } @@ -831,7 +1039,9 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -840,7 +1050,17 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif } } @@ -853,24 +1073,58 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif } } -size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) { /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); - if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif } } -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -879,7 +1133,100 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif } } diff --git a/Perl/shared/zstd/decompress/zstd_ddict.c b/Perl/shared/zstd/decompress/zstd_ddict.c new file mode 100644 index 000000000..0af3d23bf --- /dev/null +++ b/Perl/shared/zstd/decompress/zstd_ddict.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ +#include "cpu.h" /* bmi2 */ +#include "mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_decompress_internal.h" +#include "zstd_ddict.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "zstd_legacy.h" +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_free(ddict->dictBuffer, cMem); + ZSTD_free(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} diff --git a/Perl/shared/zstd/decompress/zstd_ddict.h b/Perl/shared/zstd/decompress/zstd_ddict.h new file mode 100644 index 000000000..0479d11bb --- /dev/null +++ b/Perl/shared/zstd/decompress/zstd_ddict.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include /* size_t */ +#include "zstd.h" /* ZSTD_DDict, and several public functions */ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ diff --git a/Perl/shared/zstd/decompress/zstd_decompress.c b/Perl/shared/zstd/decompress/zstd_decompress.c index 085477bf4..675596f5a 100644 --- a/Perl/shared/zstd/decompress/zstd_decompress.c +++ b/Perl/shared/zstd/decompress/zstd_decompress.c @@ -1,10 +1,11 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ @@ -13,8 +14,9 @@ *****************************************************************/ /*! * HEAPMODE : - * Select how default decompression function ZSTD_decompress() will allocate memory, - * in memory stack (0), or in memory heap (1, requires malloc()) + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. */ #ifndef ZSTD_HEAPMODE # define ZSTD_HEAPMODE 1 @@ -22,19 +24,31 @@ /*! * LEGACY_SUPPORT : -* if set to 1, ZSTD_decompress() can decode older formats (v0.1+) +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) */ #ifndef ZSTD_LEGACY_SUPPORT # define ZSTD_LEGACY_SUPPORT 0 #endif /*! -* MAXWINDOWSIZE_DEFAULT : -* maximum window size accepted by DStream, by default. -* Frames requiring more memory will be rejected. -*/ + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -# define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 #endif @@ -42,164 +56,134 @@ * Dependencies *********************************************************/ #include /* memcpy, memmove, memset */ +#include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ -#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ -#include "xxhash.h" /* XXH64_* */ #define FSE_STATIC_LINKING_ONLY #include "fse.h" #define HUF_STATIC_LINKING_ONLY #include "huf.h" -#include "zstd_internal.h" +#include "zstd_internal.h" /* blockProperties_t */ +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) # include "zstd_legacy.h" #endif -#if defined(_MSC_VER) -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define ZSTD_PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) -# define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) -#else -# define ZSTD_PREFETCH(ptr) /* disabled */ -#endif - -/*-************************************* -* Macros -***************************************/ -#define ZSTD_isError ERR_isError /* for inlining */ -#define FSE_isError ERR_isError -#define HUF_isError ERR_isError - - -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } - - /*-************************************************************* * Context management ***************************************************************/ -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; - -struct ZSTD_DCtx_s -{ - const FSE_DTable* LLTptr; - const FSE_DTable* MLTptr; - const FSE_DTable* OFTptr; - const HUF_DTable* HUFptr; - FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; - FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; - FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - const void* previousDstEnd; - const void* base; - const void* vBase; - const void* dictEnd; - size_t expected; - U32 rep[ZSTD_REP_NUM]; - ZSTD_frameParams fParams; - blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - XXH64_state_t xxhState; - size_t headerSize; - U32 dictID; - const BYTE* litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - -size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { return (dctx==NULL) ? 0 : sizeof(ZSTD_DCtx); } +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) { - dctx->expected = ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; - dctx->dictEnd = NULL; - dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - MEM_STATIC_ASSERT(sizeof(dctx->rep) == sizeof(repStartValue)); - memcpy(dctx->rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->LLTable; - dctx->MLTptr = dctx->MLTable; - dctx->OFTptr = dctx->OFTable; - dctx->HUFptr = dctx->hufTable; - return 0; + size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE : + ZSTD_FRAMEHEADERSIZE_PREFIX; + ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; } -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { - ZSTD_DCtx* dctx; + dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ + dctx->staticSize = 0; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); +} - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ - dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); - if (!dctx) return NULL; - memcpy(&dctx->customMem, &customMem, sizeof(customMem)); - ZSTD_decompressBegin(dctx); + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); return dctx; } +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + ZSTD_DCtx* ZSTD_createDCtx(void) { - return ZSTD_createDCtx_advanced(defaultCustomMem); + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; } size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; /* support free on NULL */ - ZSTD_free(dctx, dctx->customMem); - return 0; /* reserved as a potential error code in the future */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_free(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + ZSTD_free(dctx, cMem); + return 0; + } } +/* no longer useful */ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) { - size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; - memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ -} - -static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) -{ - ZSTD_decompressBegin(dstDCtx); /* init */ - if (srcDCtx) { /* support refDCtx on NULL */ - dstDCtx->dictEnd = srcDCtx->dictEnd; - dstDCtx->vBase = srcDCtx->vBase; - dstDCtx->base = srcDCtx->base; - dstDCtx->previousDstEnd = srcDCtx->previousDstEnd; - dstDCtx->dictID = srcDCtx->dictID; - dstDCtx->litEntropy = srcDCtx->litEntropy; - dstDCtx->fseEntropy = srcDCtx->fseEntropy; - dstDCtx->LLTptr = srcDCtx->LLTable; - dstDCtx->MLTptr = srcDCtx->MLTable; - dstDCtx->OFTptr = srcDCtx->OFTable; - dstDCtx->HUFptr = srcDCtx->hufTable; - dstDCtx->rep[0] = srcDCtx->rep[0]; - dstDCtx->rep[1] = srcDCtx->rep[1]; - dstDCtx->rep[2] = srcDCtx->rep[2]; - } + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } /*-************************************************************* -* Decompression section -***************************************************************/ + * Frame header decoding + ***************************************************************/ /*! ZSTD_isFrame() : * Tells if the content of `buffer` starts with a valid Frame Identifier. @@ -208,10 +192,10 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) * Note 3 : Skippable Frame Identifiers are considered valid. */ unsigned ZSTD_isFrame(const void* buffer, size_t size) { - if (size < 4) return 0; + if (size < ZSTD_FRAMEIDSIZE) return 0; { U32 const magic = MEM_readLE32(buffer); if (magic == ZSTD_MAGICNUMBER) return 1; - if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; } #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(buffer, size)) return 1; @@ -219,70 +203,93 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) return 0; } - -/** ZSTD_frameHeaderSize() : -* srcSize must be >= ZSTD_frameHeaderSize_prefix. -* @return : size of the Frame Header */ -static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { - if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); - { BYTE const fhd = ((const BYTE*)src)[4]; + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; U32 const singleSegment = (fhd >> 5) & 1; U32 const fcsId = fhd >> 6; - return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] - + (singleSegment && !fcsId); + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); } } +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + -/** ZSTD_getFrameParams() : -* decode Frame Header, or require larger `srcSize`. -* @return : 0, `fparamsPtr` is correctly filled, -* >0, `srcSize` is too small, result is expected `srcSize`, -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) { const BYTE* ip = (const BYTE*)src; - - if (srcSize < ZSTD_frameHeaderSize_prefix) return ZSTD_frameHeaderSize_prefix; - if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { - if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ - memset(fparamsPtr, 0, sizeof(*fparamsPtr)); - fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4); - fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ + size_t const minInputSize = ZSTD_startingInputLength(format); + + memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; return 0; } - return ERROR(prefix_unknown); + RETURN_ERROR(prefix_unknown); } /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); - if (srcSize < fhsize) return fhsize; } + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } - { BYTE const fhdByte = ip[4]; - size_t pos = 5; + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; U32 const dictIDSizeCode = fhdByte&3; U32 const checksumFlag = (fhdByte>>2)&1; U32 const singleSegment = (fhdByte>>5)&1; U32 const fcsID = fhdByte>>6; - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; - U32 windowSize = 0; + U64 windowSize = 0; U32 dictID = 0; - U64 frameContentSize = 0; - if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + if (!singleSegment) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ - windowSize = (1U << windowLog); + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); + windowSize = (1ULL << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } - switch(dictIDSizeCode) { - default: /* impossible */ + default: assert(0); /* impossible */ case 0 : break; case 1 : dictID = ip[pos]; pos++; break; case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; @@ -290,1065 +297,281 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t } switch(fcsID) { - default: /* impossible */ + default: assert(0); /* impossible */ case 0 : if (singleSegment) frameContentSize = ip[pos]; break; case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; case 2 : frameContentSize = MEM_readLE32(ip+pos); break; case 3 : frameContentSize = MEM_readLE64(ip+pos); break; } - if (!windowSize) windowSize = (U32)frameContentSize; - if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge); - fparamsPtr->frameContentSize = frameContentSize; - fparamsPtr->windowSize = windowSize; - fparamsPtr->dictID = dictID; - fparamsPtr->checksumFlag = checksumFlag; + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; } return 0; } - -/** ZSTD_getDecompressedSize() : -* compatible with legacy mode -* @return : decompressed size if known, 0 otherwise - note : 0 can mean any of the following : - - decompressed size is not present within frame header - - frame header unknown / not supported - - frame header not complete (`srcSize` too small) */ -unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) -{ -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getDecompressedSize_legacy(src, srcSize); -#endif - { ZSTD_frameParams fparams; - size_t const frResult = ZSTD_getFrameParams(&fparams, src, srcSize); - if (frResult!=0) return 0; - return fparams.frameContentSize; - } -} - - -/** ZSTD_decodeFrameHeader() : -* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). -* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) { - size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); - if (ZSTD_isError(result)) return result; /* invalid header */ - if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ - if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); - return 0; + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); } -typedef struct -{ - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; - -/*! ZSTD_getcBlockSize() : -* Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) { - if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - { U32 const cBlockHeader = MEM_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) return 1; - if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); - return cSize; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } } - -static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static size_t readSkippableFrameSize(void const* src, size_t srcSize) { - if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); - return srcSize; -} + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); -static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize) -{ - if (srcSize != 1) return ERROR(srcSize_wrong); - if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); - memset(dst, *(const BYTE*)src, regenSize); - return regenSize; + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported); + + return skippableHeaderSize + sizeU32; } -/*! ZSTD_decodeLiteralsBlock() : - @return : nb of bytes read from src (< srcSize ) */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) { - if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); - - { const BYTE* const istart = (const BYTE*) src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - - switch(litEncType) - { - case set_repeat: - if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); - /* fall-through */ - case set_compressed: - if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + (istart[4] << 10); - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); - if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); - - if (HUF_isError((litEncType==set_repeat) ? - ( singleStream ? - HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) : - HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) : - ( singleStream ? - HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) : - HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) )) - return ERROR(corruption_detected); - - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType==set_compressed) dctx->HUFptr = dctx->hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } + unsigned long long totalDstSize = 0; - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - break; - } + while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { + U32 const magicNumber = MEM_readLE32(src); - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - if (litSize+lhSize > srcSize) return ERROR(corruption_detected); - memcpy(dctx->litBuffer, istart+lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize+litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart+lhSize; - dctx->litSize = litSize; - return lhSize+litSize; + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) + return skippableSize; + if (srcSize < skippableSize) { + return ZSTD_CONTENTSIZE_ERROR; } - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize+1; - } - default: - return ERROR(corruption_detected); /* impossible */ + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; } - } -} - - -typedef union { - FSE_decode_t realData; - U32 alignedBy4; -} FSE_decode_t4; - -static const FSE_decode_t4 LL_defaultDTable[(1< max) return ERROR(corruption_detected); - FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src); - *DTablePtr = DTableSpace; - return 1; - case set_basic : - *DTablePtr = (const FSE_DTable*)tmpPtr; - return 0; - case set_repeat: - if (!flagRepeatTable) return ERROR(corruption_detected); - return 0; - default : /* impossible */ - case set_compressed : - { U32 tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) return ERROR(corruption_detected); - if (tableLog > maxLog) return ERROR(corruption_detected); - FSE_buildDTable(DTableSpace, norm, max, tableLog); - *DTablePtr = DTableSpace; - return headerSize; - } } -} -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* const iend = istart + srcSize; - const BYTE* ip = istart; + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; - /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); - - /* SeqHead */ - { int nbSeq = *ip++; - if (!nbSeq) { *nbSeqPtr=0; return 1; } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - if (ip+2 > iend) return ERROR(srcSize_wrong); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - if (ip >= iend) return ERROR(srcSize_wrong); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; } - *nbSeqPtr = nbSeq; - } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } - /* FSE table descriptors */ - if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ - { size_t const llhSize = ZSTD_buildSeqTable(dctx->LLTable, &dctx->LLTptr, - LLtype, MaxLL, LLFSELog, - ip, iend-ip, LL_defaultDTable, dctx->fseEntropy); - if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); - ip += llhSize; + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; } - { size_t const ofhSize = ZSTD_buildSeqTable(dctx->OFTable, &dctx->OFTptr, - OFtype, MaxOff, OffFSELog, - ip, iend-ip, OF_defaultDTable, dctx->fseEntropy); - if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); - ip += ofhSize; - } - { size_t const mlhSize = ZSTD_buildSeqTable(dctx->MLTable, &dctx->MLTptr, - MLtype, MaxML, MLFSELog, - ip, iend-ip, ML_defaultDTable, dctx->fseEntropy); - if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); - ip += mlhSize; - } - } - - return ip-istart; -} - - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE* match; -} seq_t; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ -typedef struct { - BIT_DStream_t DStream; - FSE_DState_t stateLL; - FSE_DState_t stateOffb; - FSE_DState_t stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* base; - size_t pos; - iPtrDiff gotoDict; -} seqState_t; + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; - -FORCE_NOINLINE -size_t ZSTD_execSequenceLast7(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ - - /* copy literals */ - if (op < oend_w) { - ZSTD_wildcopy(op, *litPtr, oend_w - op); - *litPtr += oend_w - op; - op = oend_w; - } - while (op < oLitEnd) *op++ = *(*litPtr)++; - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); - match = dictEnd - (base-match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - } } - while (op < oMatchEnd) *op++ = *match++; - return sequenceLength; + return totalDstSize; } - - - -static seq_t ZSTD_decodeSequence(seqState_t* seqState) +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) { - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits+mlBits+ofBits; - - static const U32 LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; - - static const U32 ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; - - static const U32 OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; - - /* sequence */ - { size_t offset; - if (!ofCode) - offset = 0; - else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - - if (ofCode <= 1) { - offset += (llCode==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || - (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); - - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; } -FORCE_INLINE -size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); - - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); - match += (dictEnd-base); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } - } } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ - } - return sequenceLength; + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong); +#endif + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return 0; } - -static size_t ZSTD_decompressSequences( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize) +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - int nbSeq; - - /* Build Decoding Tables */ - { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; - { U32 i; for (i=0; irep[i]; } - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { - nbSeq--; - { seq_t const sequence = ZSTD_decodeSequence(&seqState); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } - - /* check if reached exact end */ - if (nbSeq) return ERROR(corruption_detected); - /* save reps for next block */ - { U32 i; for (i=0; irep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op-ostart; + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; } - -static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) { - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits+mlBits+ofBits; - - static const U32 LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; + ZSTD_frameSizeInfo frameSizeInfo; + memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - static const U32 ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; - - static const U32 OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; - - /* sequence */ - { size_t offset; - if (!ofCode) - offset = 0; - else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif - if (ofCode <= 1) { - offset += (llCode==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || - (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); - - { size_t const pos = seqState->pos + seq.litLength; - seq.match = seqState->base + pos - seq.offset; /* single memory segment */ - if (seq.offset > pos) seq.match += seqState->gotoDict; /* separate memory segment */ - seqState->pos = pos + seq.matchLength; - } - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; - return seq; -} + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); -FORCE_INLINE -size_t ZSTD_execSequenceLong(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = sequence.match; + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - /* check */ -#if 1 - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); -#endif + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ -#if 1 - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; + if (blockProperties.lastBlock) break; } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } - } } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ -#endif - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + + frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; } - return sequenceLength; } -static size_t ZSTD_decompressSequencesLong( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize) +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - int nbSeq; - - /* Build Decoding Tables */ - { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STOSEQ_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { U32 i; for (i=0; irep[i]; } - seqState.base = base; - seqState.pos = (size_t)(op-base); - seqState.gotoDict = (iPtrDiff)(dictEnd - base); - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNbrep[i] = (U32)(seqState.prevOffset[i]); } - } - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; } - - return op-ostart; + return bound; } -static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; - - if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong); - - /* Decode literals sub-block */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); -} +/*-************************************************************* + * Frame decoding + ***************************************************************/ -static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) { if (dst != dctx->previousDstEnd) { /* not contiguous */ dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dst; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; dctx->previousDstEnd = dst; } } -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; -} - - /** ZSTD_insertBlock() : insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { ZSTD_checkContinuity(dctx, blockStart); dctx->previousDstEnd = (const char*)blockStart + blockSize; @@ -1356,117 +579,256 @@ ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, siz } -size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) { - if (length > dstCapacity) return ERROR(dstSize_tooSmall); - memset(dst, byte, length); - return length; + DEBUGLOG(5, "ZSTD_copyRawBlock"); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null); + } + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null); + } + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); + memset(dst, b, regenSize); + return regenSize; } /*! ZSTD_decompressFrame() : -* `dctx` must be properly initialized */ + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) { - const BYTE* ip = (const BYTE*)src; + const BYTE* ip = (const BYTE*)(*srcPtr); BYTE* const ostart = (BYTE* const)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t remainingSize = srcSize; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); /* check */ - if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, + srcSize_wrong); /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - CHECK_F(ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)); - ip += frameHeaderSize; remainingSize -= frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } /* Loop on each block */ while (1) { size_t decodedSize; blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); if (ZSTD_isError(cBlockSize)) return cBlockSize; ip += ZSTD_blockHeaderSize; - remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); break; case bt_raw : decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); break; case bt_rle : - decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); + decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); break; case bt_reserved : default: - return ERROR(corruption_detected); + RETURN_ERROR(corruption_detected); } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); + if (dctx->fParams.checksumFlag) + XXH64_update(&dctx->xxhState, op, decodedSize); op += decodedSize; ip += cBlockSize; - remainingSize -= cBlockSize; + remainingSrcSize -= cBlockSize; if (blockProperties.lastBlock) break; } - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); U32 checkRead; - if (remainingSize<4) return ERROR(checksum_wrong); + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); checkRead = MEM_readLE32(ip); - if (checkRead != checkCalc) return ERROR(checksum_wrong); - remainingSize -= 4; + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); + ip += 4; + remainingSrcSize -= 4; } - if (remainingSize) return ERROR(srcSize_wrong); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; return op-ostart; } +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + assert(decodedSize <=- dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) + return skippableSize; + RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + } + ZSTD_checkContinuity(dctx, dst); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "at least one frame successfully completed, but following " + "bytes are garbage: it's more likely to be a srcSize error, " + "specifying more bytes than compressed size of frame(s). This " + "error message replaces ERROR(prefix_unknown), which would be " + "confusing, as the first header is actually correct. Note that " + "one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic " + "bytes. But this is _much_ less likely than a srcSize field " + "error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (BYTE*)dst - (BYTE*)dststart; +} size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize); -#endif - ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); - ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); } +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); } size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { -#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - if (dctx==NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(dctx==NULL, memory_allocation); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; #else /* stack mode */ ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); #endif } @@ -1482,6 +844,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { switch(dctx->stage) { default: /* should not happen */ + assert(0); case ZSTDds_getFrameHeaderSize: case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader; @@ -1499,40 +862,42 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { } } -int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } /** ZSTD_decompressContinue() : -* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) -* or an error code, which can be tested using ZSTD_isError() */ + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ - if (srcSize != dctx->expected) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) { case ZSTDds_getFrameHeaderSize : - if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); /* impossible */ - if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } - dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { - dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - } - dctx->expected = 0; /* not necessary to copy more */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; case ZSTDds_decodeFrameHeader: - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + assert(src != NULL); + memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -1558,33 +923,43 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c dctx->stage = ZSTDds_getFrameHeaderSize; } } else { - dctx->expected = 3; /* go directly to next header */ + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ dctx->stage = ZSTDds_decodeBlockHeader; } return 0; } + case ZSTDds_decompressLastBlock: case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); { size_t rSize; switch(dctx->bType) { case bt_compressed: - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); break; case bt_raw : rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; case bt_rle : - rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); break; case bt_reserved : /* should never happen */ default: - return ERROR(corruption_detected); + RETURN_ERROR(corruption_detected); } if (ZSTD_isError(rSize)) return rSize; + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected); if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; @@ -1599,27 +974,34 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } return rSize; } + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ - if (check32 != h32) return ERROR(checksum_wrong); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; } + case ZSTDds_decodeSkippableHeader: - { memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); - dctx->stage = ZSTDds_skipFrame; - return 0; - } + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + case ZSTDds_skipFrame: - { dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + default: - return ERROR(GENERIC); /* impossible */ + assert(0); /* impossible */ + RETURN_ERROR(GENERIC); /* some compiler require default to do something */ } } @@ -1627,56 +1009,94 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dict; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; return 0; } -static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize) +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - { size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize); - if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; - U32 offcodeMaxValue=MaxOff, offcodeLog; + unsigned offcodeMaxValue = MaxOff, offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog); dictPtr += litlengthHeaderSize; } - if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); - dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); - dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); - dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); - dictPtr += 12; + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, + dictionary_corrupted); + entropy->rep[i] = rep; + } } - dctx->litEntropy = dctx->fseEntropy = 1; return dictPtr - (const BYTE*)dict; } @@ -1684,98 +1104,75 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict { if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); { U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { + if (magic != ZSTD_MAGIC_DICTIONARY) { return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ } } - dctx->dictID = MEM_readLE32((const char*)dict + 4); + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); /* load entropy tables */ - dict = (const char*)dict + 8; - dictSize -= 8; - { size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize); - if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); dict = (const char*)dict + eSize; dictSize -= eSize; } + dctx->litEntropy = dctx->fseEntropy = 1; /* reference dictionary content */ return ZSTD_refDictContent(dctx, dict, dictSize); } +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - CHECK_F(ZSTD_decompressBegin(dctx)); - if (dict && dictSize) CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted); return 0; } /* ====== ZSTD_DDict ====== */ -struct ZSTD_DDict_s { - void* dict; - size_t dictSize; - ZSTD_DCtx* refContext; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ - -ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem) +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; - - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - void* const dictContent = ZSTD_malloc(dictSize, customMem); - ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem); - - if (!dictContent || !ddict || !dctx) { - ZSTD_free(dictContent, customMem); - ZSTD_free(ddict, customMem); - ZSTD_free(dctx, customMem); - return NULL; - } - - if (dictSize) { - memcpy(dictContent, dict, dictSize); - } - { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize); - if (ZSTD_isError(errorCode)) { - ZSTD_free(dictContent, customMem); - ZSTD_free(ddict, customMem); - ZSTD_free(dctx, customMem); - return NULL; - } } - - ddict->dict = dictContent; - ddict->dictSize = dictSize; - ddict->refContext = dctx; - return ddict; + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); } -} - -/*! ZSTD_createDDict() : -* Create a digested dictionary, ready to start decompression without startup delay. -* `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dict, dictSize, allocator); -} - -size_t ZSTD_freeDDict(ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = ddict->refContext->customMem; - ZSTD_freeDCtx(ddict->refContext); - ZSTD_free(ddict->dict, cMem); - ZSTD_free(ddict, cMem); - return 0; + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); } -} - -size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize; + return 0; } /*! ZSTD_getDictID_fromDict() : @@ -1785,34 +1182,27 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) { if (dictSize < 8) return 0; - if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return 0; - return MEM_readLE32((const char*)dict + 4); -} - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; - return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize); + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); } /*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. + * Provides the dictID required to decompress frame stored within `src`. * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) { - ZSTD_frameParams zfp = { 0 , 0 , 0 , 0 }; - size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); if (ZSTD_isError(hError)) return 0; return zfp.dictID; } @@ -1826,12 +1216,10 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, const void* src, size_t srcSize, const ZSTD_DDict* ddict) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize); -#endif - ZSTD_refDCtx(dctx, ddict->refContext); - ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); } @@ -1839,133 +1227,246 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, * Streaming decompression *====================================*/ -typedef enum { zdss_init, zdss_loadHeader, - zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; - -/* *** Resource management *** */ -struct ZSTD_DStream_s { - ZSTD_DCtx* dctx; - ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; - ZSTD_frameParams fParams; - ZSTD_dStreamStage stage; - char* inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char* outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t blockSize; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ - size_t lhSize; - ZSTD_customMem customMem; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; -}; /* typedef'd to ZSTD_DStream within "zstd.h" */ - - ZSTD_DStream* ZSTD_createDStream(void) { - return ZSTD_createDStream_advanced(defaultCustomMem); + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); } -ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) { - ZSTD_DStream* zds; - - if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; - if (!customMem.customAlloc || !customMem.customFree) return NULL; + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} - zds = (ZSTD_DStream*) ZSTD_malloc(sizeof(ZSTD_DStream), customMem); - if (zds==NULL) return NULL; - memset(zds, 0, sizeof(ZSTD_DStream)); - memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); - zds->dctx = ZSTD_createDCtx_advanced(customMem); - if (zds->dctx == NULL) { ZSTD_freeDStream(zds); return NULL; } - zds->stage = zdss_init; - zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - return zds; +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); } size_t ZSTD_freeDStream(ZSTD_DStream* zds) { - if (zds==NULL) return 0; /* support free on null */ - { ZSTD_customMem const cMem = zds->customMem; - ZSTD_freeDCtx(zds->dctx); - ZSTD_freeDDict(zds->ddictLocal); - ZSTD_free(zds->inBuff, cMem); - ZSTD_free(zds->outBuff, cMem); -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (zds->legacyContext) - ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion); -#endif - ZSTD_free(zds, cMem); - return 0; + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + ZSTD_clearDict(dctx); + if (dict && dictSize >= 8) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); } +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); + dctx->dictUses = ZSTD_use_once; + return 0; +} -/* *** Initialization *** */ +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. + * this function cannot fail */ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) { - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - ZSTD_freeDDict(zds->ddictLocal); - if (dict) { - zds->ddictLocal = ZSTD_createDDict(dict, dictSize); - if (zds->ddictLocal == NULL) return ERROR(memory_allocation); - } else zds->ddictLocal = NULL; - zds->ddict = zds->ddictLocal; - zds->legacyVersion = 0; - zds->hostageByte = 0; - return ZSTD_frameHeaderSize_prefix; + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + return ZSTD_FRAMEHEADERSIZE_PREFIX; } +/* note : this variant can't fail */ size_t ZSTD_initDStream(ZSTD_DStream* zds) { - return ZSTD_initDStream_usingDict(zds, NULL, 0); + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); } -size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict) /**< note : ddict will just be referenced, and must outlive decompression session */ +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) { - size_t const initResult = ZSTD_initDStream(zds); - zds->ddict = ddict; - return initResult; + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); + return ZSTD_FRAMEHEADERSIZE_PREFIX; } -size_t ZSTD_resetDStream(ZSTD_DStream* zds) +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) { - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - return ZSTD_frameHeaderSize_prefix; + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); + return ZSTD_FRAMEHEADERSIZE_PREFIX; } -size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, - ZSTD_DStreamParameter_e paramType, unsigned paramValue) + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - switch(paramType) - { - default : return ERROR(parameter_unknown); - case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; } return 0; } +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported); +} -size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) { - if (zds==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddictLocal) + zds->inBuffSize + zds->outBuffSize; + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + ZSTD_clearDict(dctx); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); } @@ -1989,126 +1490,201 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB char* op = ostart; U32 someMoreWork = 1; -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - if (zds->legacyVersion) - return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); -#endif + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); while (someMoreWork) { - switch(zds->stage) + switch(zds->streamStage) { case zdss_init : - ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; /* fall-through */ case zdss_loadHeader : - { size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); - if (ZSTD_isError(hSize)) + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); if (legacyVersion) { - const void* const dict = zds->ddict ? zds->ddict->dict : NULL; - size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; - CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, - dict, dictSize)); + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize)); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; - return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); - } else { - return hSize; /* error */ - } } -#else - return hSize; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } #endif + return hSize; /* error */ + } if (hSize != 0) { /* need more input */ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ - memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip); - zds->lhSize += iend-ip; + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } input->pos = input->size; - return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ } + assert(ip != NULL); memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; break; } } - /* Consume header */ - { const ZSTD_DCtx* refContext = zds->ddict ? zds->ddict->refContext : NULL; - ZSTD_refDCtx(zds->dctx, refContext); - } - { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); - { size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer+h1Size, h2Size)); + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; } } + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge); /* Adapt buffer sizes to frame header instructions */ - { size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->fParams.windowSize + blockSize; - zds->blockSize = blockSize; - if (zds->inBuffSize < blockSize) { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = blockSize; - zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem); - if (zds->inBuff == NULL) return ERROR(memory_allocation); - } - if (zds->outBuffSize < neededOutSize) { - ZSTD_free(zds->outBuff, zds->customMem); - zds->outBuffSize = neededOutSize; - zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem); - if (zds->outBuff == NULL) return ERROR(memory_allocation); + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); + if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation); + } else { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; } } - zds->stage = zdss_read; - /* pass-through */ + zds->streamStage = zdss_read; + /* fall-through */ case zdss_read: - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); if (neededInSize==0) { /* end of frame */ - zds->stage = zdss_init; + zds->streamStage = zdss_init; someMoreWork = 0; break; } if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t const decodedSize = ZSTD_decompressContinue(zds, zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize); if (ZSTD_isError(decodedSize)) return decodedSize; ip += neededInSize; if (!decodedSize && !isSkipFrame) break; /* this was just a header */ zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; + zds->streamStage = zdss_flush; break; - } - if (ip==iend) { someMoreWork = 0; break; } /* no more input */ - zds->stage = zdss_load; - /* pass-through */ - } + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ case zdss_load: - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); - size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); size_t loadedSize; - if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + } ip += loadedSize; zds->inPos += loadedSize; if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ - { const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, + { size_t const decodedSize = ZSTD_decompressContinue(zds, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, zds->inBuff, neededInSize); if (ZSTD_isError(decodedSize)) return decodedSize; zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { zds->stage = zdss_read; break; } /* this was just a header */ + if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; - /* pass-through */ } } + zds->streamStage = zdss_flush; + /* fall-through */ case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; @@ -2116,39 +1692,74 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB op += flushedSize; zds->outStart += flushedSize; if (flushedSize == toFlushSize) { /* flush completed */ - zds->stage = zdss_read; - if (zds->outStart + zds->blockSize > zds->outBuffSize) + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); zds->outStart = zds->outEnd = 0; + } break; - } - /* cannot complete flush */ - someMoreWork = 0; - break; - } - default: return ERROR(GENERIC); /* impossible */ + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC); /* some compiler require default to do something */ } } /* result */ - input->pos += (size_t)(ip-istart); - output->pos += (size_t)(op-ostart); - { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall); + RETURN_ERROR_IF(ip==iend, srcSize_wrong); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); if (!nextSrcSizeHint) { /* frame fully decoded */ if (zds->outEnd == zds->outStart) { /* output fully flushed */ if (zds->hostageByte) { - if (input->pos >= input->size) { zds->stage = zdss_read; return 1; } /* can't release hostage (not present) */ + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } input->pos++; /* release hostage */ - } + } /* zds->hostageByte */ return 0; - } + } /* zds->outEnd == zds->outStart */ if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ zds->hostageByte=1; } return 1; - } - nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ - if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */ - nextSrcSizeHint -= zds->inPos; /* already loaded*/ + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ return nextSrcSizeHint; } } + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} diff --git a/Perl/shared/zstd/decompress/zstd_decompress_block.c b/Perl/shared/zstd/decompress/zstd_decompress_block.c new file mode 100644 index 000000000..a2a7eedcf --- /dev/null +++ b/Perl/shared/zstd/decompress/zstd_decompress_block.c @@ -0,0 +1,1311 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ +#include "compiler.h" /* prefetch */ +#include "cpu.h" /* bmi2 */ +#include "mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + (istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +void +ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U16 symbolNext[MaxSeq+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u max, corruption_detected); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + + +/* ZSTD_execSequenceLast7(): + * exceptional case : decompress a match starting within last 7 bytes of output buffer. + * requires more careful checks, to ensure there is no overflow. + * performance does not matter though. + * note : this case is supposed to be never generated "naturally" by reference encoder, + * since in most cases it needs at least 8 bytes to look for a match. + * but it's allowed by the specification. */ +FORCE_NOINLINE +size_t ZSTD_execSequenceLast7(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); + + /* copy literals */ + while (op < oLitEnd) *op++ = *(*litPtr)++; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + while (op < oMatchEnd) *op++ = *match++; + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequenceLong(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = sequence.match; + + /* check */ + RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); + if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + assert(op <= oend_w); + assert(sequence.matchLength >= MINMATCH); + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { /* offset == 0 */ + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + RETURN_ERROR_IF(nbSeq, corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + { size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} diff --git a/Perl/shared/zstd/decompress/zstd_decompress_block.h b/Perl/shared/zstd/decompress/zstd_decompress_block.h new file mode 100644 index 000000000..7e9296041 --- /dev/null +++ b/Perl/shared/zstd/decompress/zstd_decompress_block.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include /* size_t */ +#include "zstd.h" /* DCtx, and some public functions */ +#include "zstd_internal.h" /* blockProperties_t, and some public functions */ +#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog); + + +#endif /* ZSTD_DEC_BLOCK_H */ diff --git a/Perl/shared/zstd/decompress/zstd_decompress_internal.h b/Perl/shared/zstd/decompress/zstd_decompress_internal.h new file mode 100644 index 000000000..ccbdfa090 --- /dev/null +++ b/Perl/shared/zstd/decompress/zstd_decompress_internal.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "mem.h" /* BYTE, U16, U32 */ +#include "zstd_internal.h" /* ZSTD_seqSymbol */ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/Perl/shared/zstd/zstd.h b/Perl/shared/zstd/zstd.h index b02e16b42..53470c18f 100644 --- a/Perl/shared/zstd/zstd.h +++ b/Perl/shared/zstd/zstd.h @@ -2,11 +2,11 @@ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ - #if defined (__cplusplus) extern "C" { #endif @@ -19,192 +19,517 @@ extern "C" { /* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDLIB_API __attribute__ ((visibility ("default"))) -#elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else -# define ZSTDLIB_API +# define ZSTDLIB_API ZSTDLIB_VISIBILITY #endif -/******************************************************************************************************* +/******************************************************************************* Introduction - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios - at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and - decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. - Levels >= 20, labelled `--ultra`, should be used with caution, as they require more memory. + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + Compression can be done in: - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit memory management) + - a single step, reusing a context (described as Explicit context) - unbounded multiple steps (described as Streaming compression) - The compression ratio achievable on small data can be highly improved using compression with a dictionary in: + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Fast dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. - Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. - These APIs shall never be used with a dynamic library. - They are not "stable", their definition may change in the future. Only static linking is allowed. -*********************************************************************************************************/ + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 1 -#define ZSTD_VERSION_RELEASE 2 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 0 + +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) +ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE #define ZSTD_QUOTE(str) #str #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) +ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ + +/*************************************** +* Default constant +***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/*************************************** +* Constants +***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. - @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). */ + * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); /*! ZSTD_decompress() : - `compressedSize` : must be the _exact_ size of a single compressed frame. - `dstCapacity` is an upper bound of originalSize. - If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - or an errorCode if it fails (which can be tested using ZSTD_isError()). */ + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + /*! ZSTD_getDecompressedSize() : -* 'src' is the start of a zstd compressed frame. -* @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. -* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. -* When `return==0`, data to decompress could be any size. -* In which case, it's necessary to use streaming mode to decompress data. -* Optionally, application can still use ZSTD_decompress() while relying on implied limits. -* (For example, data may be necessarily cut into blocks <= 16 KB). -* note 2 : decompressed size is always present when compression is done with ZSTD_compress() -* note 3 : decompressed size can be very large (64-bits value), -* potentially larger than what local system can handle as a single memory segment. -* In which case, it's necessary to use streaming mode to decompress data. -* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. -* Always ensure result fits within application's authorized limits. -* Each application can set its own limits. -* note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */ + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + /*====== Helper functions ======*/ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ /*************************************** -* Explicit memory management +* Explicit context ***************************************/ /*= Compression context -* When compressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* This will make workload friendlier for system's memory. -* Use one context per thread for parallel execution in multi-threaded environments. */ + * When compressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution in multi-threaded environments. */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /*! ZSTD_compressCCtx() : - Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); - -/*= Decompression context */ + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx + * The function will compress at requested compression level, + * ignoring any other parameter */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /*! ZSTD_decompressDCtx() : -* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : -* Compression using a predefined Dictionary (see dictBuilder/zdict.h). -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -/*! ZSTD_decompress_usingDict() : -* Decompression using a predefined Dictionary (see dictBuilder/zdict.h). -* Dictionary must be identical to the one used during compression. -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); +/*************************************** +* Advanced compression API +***************************************/ -/**************************** -* Fast dictionary API -****************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx() + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ -/*! ZSTD_createCDict() : -* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. -* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. -* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. -* `dict` can be released after ZSTD_CDict creation. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); -/*! ZSTD_freeCDict() : -* Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict() + * for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level sets all default values of other compression parameters */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such window size at decompression stage if using streaming. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless when using "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless when using "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * They return an error otherwise. */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression work is performed in parallel, within worker threads. + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, +} ZSTD_cParameter; -/*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation. */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -typedef struct ZSTD_DDict_s ZSTD_DDict; +/*************************************** +* Advanced decompression API +***************************************/ -/*! ZSTD_createDDict() : -* Create a digested dictionary, ready to start decompression operation without startup delay. -* `dict` can be released after creation. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ -/*! ZSTD_freeDDict() : -* Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_format + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); -/*! ZSTD_decompress_usingDDict() : -* Decompression using a digested Dictionary. -* Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); /**************************** @@ -231,51 +556,136 @@ typedef struct ZSTD_outBuffer_s { * A ZSTD_CStream object is required to track streaming operation. * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively, -* since it will play nicer with system's memory, by re-using already allocated memory. -* Use one separate ZSTD_CStream per thread for parallel execution. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. * -* Start a new compression by initializing ZSTD_CStream. -* Use ZSTD_initCStream() to start a new compression operation. -* Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section) +* For parallel execution, use one separate ZSTD_CStream per thread. * -* Use ZSTD_compressStream() repetitively to consume input stream. -* The function will automatically update both `pos` fields. -* Note that it may not consume the entire input, in which case `pos < size`, -* and it's up to the caller to present again remaining data. -* @return : a size hint, preferred nb of bytes to use as input for next function call -* or an error code, which can be tested using ZSTD_isError(). -* Note 1 : it's just a hint, to help latency a little, any other value will work fine. -* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). * -* At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). -* `output->pos` will be updated. -* Note that some content might still be left within internal buffer if `output->size` is too small. -* @return : nb of bytes still present within internal buffer (0 if it's empty) +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). * -* ZSTD_endStream() instructs to finish a frame. +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. * It will perform a flush and write frame epilogue. * The epilogue is required for decoders to consider a frame completed. -* Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. -* In which case, call again ZSTD_endStream() to complete the flush. -* @return : nb of bytes still present within internal buffer (0 if it's empty, hence compression completed) +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). * * *******************************************************************/ -typedef struct ZSTD_CStream_s ZSTD_CStream; +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ +/******************************************************************************* + * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and + * ZSTD_compressStream2(). It is redundant, but is still fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/** + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/** + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the number of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); /*-*************************************************************************** @@ -285,89 +695,370 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. * ZSTD_DStream objects can be re-used multiple times. * -* Use ZSTD_initDStream() to start a new decompression operation, -* or ZSTD_initDStream_usingDict() if decompression requires a dictionary. -* @return : recommended first input size +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. * * Use ZSTD_decompressStream() repetitively to consume your input. * The function will update both `pos` fields. * If `input.pos < input.size`, some input has not been consumed. * It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. * If `output.pos < output.size`, decoder has flushed everything it could. -* @return : 0 when a frame is completely decoded and fully flushed, -* an error code, which can be tested using ZSTD_isError(), -* any other value > 0, which means there is still some decoding to do to complete current frame. -* The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. * *******************************************************************************/ -typedef struct ZSTD_DStream_s ZSTD_DStream; +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds); + * ZSTD_DCtx_refDDict(zds, NULL); + */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. + * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. + * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + #endif /* ZSTD_H_235446 */ +/**************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY /**************************************************************************************** - * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS - * The definitions in this section are considered experimental. - * They should never be used with a dynamic library, as they may change in the future. - * They are provided for advanced usages. - * Use them only in association with static linking. + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) * ***************************************************************************************/ -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U - -#define ZSTD_WINDOWLOG_MAX_32 25 -#define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ -static const size_t ZSTD_frameHeaderSize_prefix = 5; -static const size_t ZSTD_frameHeaderSize_min = 6; -static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ - - -/*--- Advanced types ---*/ -typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy; /* from faster to stronger */ +#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN 6 +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1<= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -/*! ZSTD_sizeofCCtx() : - * Gives the amount of memory used by a given ZSTD_CCtx */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -/*! ZSTD_createCDict_advanced() : - * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CCtx size estimation is only correct for single-threaded compression. */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, - ZSTD_parameters params, ZSTD_customMem customMem); + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); -/*! ZSTD_sizeof_CDict() : - * Gives the amount of memory used by a given ZSTD_sizeof_CDict */ -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); /*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. -* `estimatedSrcSize` value is optional, select 0 if not known */ + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. -* All fields of `ZSTD_frameParameters` are set to default (0) */ + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_checkCParams() : -* Ensure param values remain within authorized range */ + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); /*! ZSTD_adjustCParams() : -* optimize params for a given `srcSize` and `dictSize`. -* both values are optional, select `0` if unknown. */ + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : -* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ -ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); + * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); -/*--- Advanced decompression functions ---*/ +/*************************************** +* Advanced decompression functions +***************************************/ /*! ZSTD_isFrame() : * Tells if the content of `buffer` starts with a valid Frame Identifier. @@ -443,78 +1503,216 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, * Note 3 : Skippable Frame Identifiers are considered valid. */ ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); -/*! ZSTD_estimateDCtxSize() : - * Gives the potential amount of memory allocated to create a ZSTD_DCtx */ -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); - -/*! ZSTD_createDCtx_advanced() : - * Create a ZSTD decompression context using external alloc and free functions */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); - -/*! ZSTD_sizeof_DCtx() : - * Gives the amount of memory used by a given ZSTD_DCtx */ -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); - -/*! ZSTD_sizeof_DDict() : - * Gives the amount of memory used by a given ZSTD_DDict */ -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); /******************************************************************** * Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. ********************************************************************/ /*===== Advanced Streaming compression functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */ +/**! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + */ +ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); +/**! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + */ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); +/**! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. + */ ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */ -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); + ZSTD_parameters params, unsigned long long pledgedSrcSize); +/**! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); +/**! ZSTD_initCStream_usingCDict_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); /*===== Advanced Streaming decompression functions =====*/ -typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + */ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); +/** + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /********************************************************************* * Buffer-less and synchronous inner streaming functions * * This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with many restrictions (documented below). -* Prefer using normal streaming API for an easier experience +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. ********************************************************************* */ /** @@ -531,8 +1729,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); Then, consume your input using ZSTD_compressContinue(). There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. - - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. Worst case evaluation is provided by ZSTD_compressBound(). ZSTD_compressContinue() doesn't guarantee recover after a failed compression. @@ -542,22 +1740,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); In which case, it will "discard" the relevant memory section from its history. Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame, - Without last block mark, frames will be considered unfinished (broken) by decoders. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. */ /*===== Buffer-less streaming compression functions =====*/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - /*- Buffer-less streaming decompression (synchronous mode) @@ -565,38 +1765,54 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. A ZSTD_DCtx object can be re-used multiple times. - First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams(). - It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame, - such as the minimum rolling buffer size to allocate to decompress data (`windowSize`), - and the dictionary ID used. - (Note : content size is optional, it may not be present. 0 means : content size unknown). - Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information. - As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation. - Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB. - Frame parameters are extracted from the beginning of the compressed frame. - Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes. - @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled. + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. errorCode, which can be tested using ZSTD_isError(). - Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). - Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item. + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. It can also be an error code, which can be tested with ZSTD_isError(). - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`. - They should preferably be located contiguously, prior to current block. - Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference. - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. Context can then be reset to start a new decompression. @@ -606,35 +1822,59 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci == Special case : skippable frames == Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_decompressContinue() always returns 0. - For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. - It also returns Frame Size as fparamsPtr->frameContentSize. + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. */ +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; typedef struct { - unsigned long long frameContentSize; - unsigned windowSize; + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; unsigned dictID; unsigned checksumFlag; -} ZSTD_frameParams; +} ZSTD_frameHeader; + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ -/*===== Buffer-less streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -/** - Block functions + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. @@ -643,27 +1883,27 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - Compressing and decompressing require a context structure + Use ZSTD_createCCtx() and ZSTD_createDCtx() - It is necessary to init context before starting - + compression : ZSTD_compressBegin() - + decompression : ZSTD_decompressBegin() - + variants _usingDict() are also allowed - + copyCCtx() and copyDCtx() work too - - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - + If you need to compress more, cut data into multiple blocks - + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large. + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, really consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst`. + In which case, nothing is produced into `dst` ! + User must test for such outcome and deal directly with uncompressed data + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! - + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history. - Use ZSTD_insertBlock() in such a case. + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. */ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ /*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */