forked from netty/netty
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Motivation: LZMA compression algorithm has a very good compression ratio. Modifications: - Added `lzma-java` library which implements LZMA algorithm. - Implemented LzmaFrameEncoder which extends MessageToByteEncoder and provides compression of outgoing messages. - Added tests to verify the LzmaFrameEncoder and how it can compress data for the next uncompression using the original library. Result: LZMA encoder which can compress data using LZMA algorithm.
- Loading branch information
1 parent
08cec3c
commit cf5aea5
Showing
7 changed files
with
580 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
215 changes: 215 additions & 0 deletions
215
codec/src/main/java/io/netty/handler/codec/compression/LzmaFrameEncoder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
/* | ||
* Copyright 2014 The Netty Project | ||
* | ||
* The Netty Project licenses this file to you under the Apache License, | ||
* version 2.0 (the "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at: | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
* License for the specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package io.netty.handler.codec.compression; | ||
|
||
import io.netty.buffer.ByteBuf; | ||
import io.netty.buffer.ByteBufInputStream; | ||
import io.netty.buffer.ByteBufOutputStream; | ||
import io.netty.channel.ChannelHandlerContext; | ||
import io.netty.handler.codec.MessageToByteEncoder; | ||
import io.netty.util.internal.logging.InternalLogger; | ||
import io.netty.util.internal.logging.InternalLoggerFactory; | ||
import lzma.sdk.lzma.Base; | ||
import lzma.sdk.lzma.Encoder; | ||
|
||
import java.io.InputStream; | ||
|
||
import static lzma.sdk.lzma.Encoder.*; | ||
|
||
/** | ||
* Compresses a {@link ByteBuf} using the LZMA algorithm. | ||
* | ||
* See <a href="http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm">LZMA</a> | ||
* and <a href="http://svn.python.org/projects/external/xz-5.0.5/doc/lzma-file-format.txt">LZMA format</a> | ||
* or documents in <a href="http://www.7-zip.org/sdk.html">LZMA SDK</a> archive. | ||
*/ | ||
public class LzmaFrameEncoder extends MessageToByteEncoder<ByteBuf> { | ||
|
||
private static final InternalLogger logger = InternalLoggerFactory.getInstance(LzmaFrameEncoder.class); | ||
|
||
private static final int MEDIUM_DICTIONARY_SIZE = 1 << 16; | ||
|
||
private static final int MIN_FAST_BYTES = 5; | ||
private static final int MEDIUM_FAST_BYTES = 0x20; | ||
private static final int MAX_FAST_BYTES = Base.kMatchMaxLen; | ||
|
||
private static final int DEFAULT_MATCH_FINDER = EMatchFinderTypeBT4; | ||
|
||
private static final int DEFAULT_LC = 3; | ||
private static final int DEFAULT_LP = 0; | ||
private static final int DEFAULT_PB = 2; | ||
|
||
/** | ||
* Underlying LZMA encoder in use. | ||
*/ | ||
private final Encoder encoder; | ||
|
||
/** | ||
* The Properties field contains three properties which are encoded using the following formula: | ||
* | ||
* <p>{@code Properties = (pb * 5 + lp) * 9 + lc}</p> | ||
* | ||
* The field consists of | ||
* <ol> | ||
* <li>the number of literal context bits (lc, [0, 8]);</li> | ||
* <li>the number of literal position bits (lp, [0, 4]);</li> | ||
* <li>the number of position bits (pb, [0, 4]).</li> | ||
* </ol> | ||
*/ | ||
private final byte properties; | ||
|
||
/** | ||
* Dictionary Size is stored as an unsigned 32-bit little endian integer. | ||
*/ | ||
private final int littleEndianDictionarySize; | ||
|
||
/** | ||
* For log warning only once. | ||
*/ | ||
private static boolean warningLogged; | ||
|
||
/** | ||
* Creates LZMA encoder with default settings. | ||
*/ | ||
public LzmaFrameEncoder() { | ||
this(MEDIUM_DICTIONARY_SIZE); | ||
} | ||
|
||
/** | ||
* Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb} | ||
* values and the medium dictionary size of {@value #MEDIUM_DICTIONARY_SIZE}. | ||
*/ | ||
public LzmaFrameEncoder(int lc, int lp, int pb) { | ||
this(lc, lp, pb, MEDIUM_DICTIONARY_SIZE); | ||
} | ||
|
||
/** | ||
* Creates LZMA encoder with specified dictionary size and default values of | ||
* {@code lc} = {@value #DEFAULT_LC}, | ||
* {@code lp} = {@value #DEFAULT_LP}, | ||
* {@code pb} = {@value #DEFAULT_PB}. | ||
*/ | ||
public LzmaFrameEncoder(int dictionarySize) { | ||
this(DEFAULT_LC, DEFAULT_LP, DEFAULT_PB, dictionarySize); | ||
} | ||
|
||
/** | ||
* Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb} values and custom dictionary size. | ||
*/ | ||
public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize) { | ||
this(lc, lp, pb, dictionarySize, false, MEDIUM_FAST_BYTES); | ||
} | ||
|
||
/** | ||
* Creates LZMA encoder with specified settings. | ||
* | ||
* @param lc | ||
* the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}. | ||
* @param lp | ||
* the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}. | ||
* @param pb | ||
* the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}. | ||
* @param dictionarySize | ||
* available values [0, {@link java.lang.Integer#MAX_VALUE}], | ||
* default value is {@value #MEDIUM_DICTIONARY_SIZE}. | ||
* @param endMarkerMode | ||
* indicates should {@link LzmaFrameEncoder} use end of stream marker or not. | ||
* Note, that {@link LzmaFrameEncoder} always sets size of uncompressed data | ||
* in LZMA header, so EOS marker is unnecessary. But you may use it for | ||
* better portability. For full description see "LZMA Decoding modes" section | ||
* of LZMA-Specification.txt in official LZMA SDK. | ||
* @param numFastBytes | ||
* available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}]. | ||
*/ | ||
public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize, boolean endMarkerMode, int numFastBytes) { | ||
if (lc < 0 || lc > 8) { | ||
throw new IllegalArgumentException("lc: " + lc + " (expected: 0-8)"); | ||
} | ||
if (lp < 0 || lp > 4) { | ||
throw new IllegalArgumentException("lp: " + lp + " (expected: 0-4)"); | ||
} | ||
if (pb < 0 || pb > 4) { | ||
throw new IllegalArgumentException("pb: " + pb + " (expected: 0-4)"); | ||
} | ||
if (lc + pb > 4) { | ||
if (!warningLogged) { | ||
logger.warn("The latest versions of LZMA libraries (for example, XZ Utils) " + | ||
"has an additional requirement: lc + lp <= 4. Data which don't follow " + | ||
"this requirement cannot be decompressed with this libraries."); | ||
warningLogged = true; | ||
} | ||
} | ||
if (dictionarySize < 0) { | ||
throw new IllegalArgumentException("dictionarySize: " + dictionarySize + " (expected: 0+)"); | ||
} | ||
if (numFastBytes < MIN_FAST_BYTES || numFastBytes > MAX_FAST_BYTES) { | ||
throw new IllegalArgumentException(String.format( | ||
"numFastBytes: %d (expected: %d-%d)", numFastBytes, MIN_FAST_BYTES, MAX_FAST_BYTES | ||
)); | ||
} | ||
|
||
encoder = new Encoder(); | ||
encoder.setDictionarySize(dictionarySize); | ||
encoder.setEndMarkerMode(endMarkerMode); | ||
encoder.setMatchFinder(DEFAULT_MATCH_FINDER); | ||
encoder.setNumFastBytes(numFastBytes); | ||
encoder.setLcLpPb(lc, lp, pb); | ||
|
||
properties = (byte) ((pb * 5 + lp) * 9 + lc); | ||
littleEndianDictionarySize = Integer.reverseBytes(dictionarySize); | ||
} | ||
|
||
@Override | ||
protected void encode(ChannelHandlerContext ctx, ByteBuf in, ByteBuf out) throws Exception { | ||
final int length = in.readableBytes(); | ||
final InputStream bbIn = new ByteBufInputStream(in); | ||
|
||
final ByteBufOutputStream bbOut = new ByteBufOutputStream(out); | ||
bbOut.writeByte(properties); | ||
bbOut.writeInt(littleEndianDictionarySize); | ||
bbOut.writeLong(Long.reverseBytes(length)); | ||
encoder.code(bbIn, bbOut, -1, -1, null); | ||
|
||
bbIn.close(); | ||
bbOut.close(); | ||
} | ||
|
||
@Override | ||
protected ByteBuf allocateBuffer(ChannelHandlerContext ctx, ByteBuf in, boolean preferDirect) throws Exception { | ||
final int length = in.readableBytes(); | ||
final int maxOutputLength = maxOutputBufferLength(length); | ||
return ctx.alloc().ioBuffer(maxOutputLength); | ||
} | ||
|
||
/** | ||
* Calculates maximum possible size of output buffer for not compressible data. | ||
*/ | ||
private static int maxOutputBufferLength(int inputLength) { | ||
double factor; | ||
if (inputLength < 200) { | ||
factor = 1.5; | ||
} else if (inputLength < 500) { | ||
factor = 1.2; | ||
} else if (inputLength < 1000) { | ||
factor = 1.1; | ||
} else if (inputLength < 10000) { | ||
factor = 1.05; | ||
} else { | ||
factor = 1.02; | ||
} | ||
return 13 + (int) (inputLength * factor); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
145 changes: 145 additions & 0 deletions
145
codec/src/test/java/io/netty/handler/codec/compression/LzmaFrameEncoderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
/* | ||
* Copyright 2014 The Netty Project | ||
* | ||
* The Netty Project licenses this file to you under the Apache License, | ||
* version 2.0 (the "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at: | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
* License for the specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package io.netty.handler.codec.compression; | ||
|
||
import io.netty.buffer.ByteBuf; | ||
import io.netty.buffer.ByteBufInputStream; | ||
import io.netty.buffer.CompositeByteBuf; | ||
import io.netty.buffer.Unpooled; | ||
import io.netty.channel.embedded.EmbeddedChannel; | ||
import io.netty.util.internal.ThreadLocalRandom; | ||
import lzma.sdk.lzma.Decoder; | ||
import lzma.streams.LzmaInputStream; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import java.io.InputStream; | ||
|
||
import static org.junit.Assert.*; | ||
|
||
public class LzmaFrameEncoderTest { | ||
|
||
private static final ThreadLocalRandom rand; | ||
|
||
private static final byte[] BYTES_SMALL = new byte[256]; | ||
private static final byte[] BYTES_LARGE = new byte[256000]; | ||
|
||
static { | ||
rand = ThreadLocalRandom.current(); | ||
rand.nextBytes(BYTES_SMALL); | ||
rand.nextBytes(BYTES_LARGE); | ||
} | ||
|
||
private EmbeddedChannel channel; | ||
|
||
@Before | ||
public void initChannel() { | ||
channel = new EmbeddedChannel(new LzmaFrameEncoder()); | ||
} | ||
|
||
private static void testCompression(final EmbeddedChannel channel, final byte[] data) throws Exception { | ||
ByteBuf in = Unpooled.wrappedBuffer(data); | ||
assertTrue(channel.writeOutbound(in)); | ||
assertTrue(channel.finish()); | ||
|
||
byte[] uncompressed = uncompress(channel, data.length); | ||
|
||
assertArrayEquals(data, uncompressed); | ||
} | ||
|
||
@Test | ||
public void testCompressionOfSmallChunkOfData() throws Exception { | ||
testCompression(channel, BYTES_SMALL); | ||
} | ||
|
||
@Test | ||
public void testCompressionOfLargeChunkOfData() throws Exception { | ||
testCompression(channel, BYTES_LARGE); | ||
} | ||
|
||
@Test | ||
public void testCompressionOfBatchedFlowOfData() throws Exception { | ||
final byte[] data = BYTES_SMALL; | ||
|
||
int written = 0, length = rand.nextInt(50); | ||
while (written + length < data.length) { | ||
ByteBuf in = Unpooled.wrappedBuffer(data, written, length); | ||
assertTrue(channel.writeOutbound(in)); | ||
written += length; | ||
length = rand.nextInt(50); | ||
} | ||
ByteBuf in = Unpooled.wrappedBuffer(data, written, data.length - written); | ||
assertTrue(channel.writeOutbound(in)); | ||
assertTrue(channel.finish()); | ||
|
||
byte[] uncompressed = new byte[data.length]; | ||
int outOffset = 0; | ||
|
||
ByteBuf msg; | ||
while ((msg = channel.readOutbound()) != null) { | ||
InputStream is = new ByteBufInputStream(msg); | ||
LzmaInputStream lzmaIs = new LzmaInputStream(is, new Decoder()); | ||
for (;;) { | ||
int read = lzmaIs.read(uncompressed, outOffset, data.length - outOffset); | ||
if (read > 0) { | ||
outOffset += read; | ||
} else { | ||
break; | ||
} | ||
} | ||
assertEquals(0, is.available()); | ||
assertEquals(-1, is.read()); | ||
|
||
is.close(); | ||
lzmaIs.close(); | ||
msg.release(); | ||
} | ||
|
||
assertArrayEquals(data, uncompressed); | ||
} | ||
|
||
private static byte[] uncompress(EmbeddedChannel channel, int length) throws Exception { | ||
CompositeByteBuf out = Unpooled.compositeBuffer(); | ||
ByteBuf msg; | ||
while ((msg = channel.readOutbound()) != null) { | ||
out.addComponent(msg); | ||
out.writerIndex(out.writerIndex() + msg.readableBytes()); | ||
} | ||
|
||
InputStream is = new ByteBufInputStream(out); | ||
LzmaInputStream lzmaIs = new LzmaInputStream(is, new Decoder()); | ||
byte[] uncompressed = new byte[length]; | ||
int remaining = length; | ||
while (remaining > 0) { | ||
int read = lzmaIs.read(uncompressed, length - remaining, remaining); | ||
if (read > 0) { | ||
remaining -= read; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
assertEquals(0, is.available()); | ||
assertEquals(-1, is.read()); | ||
assertEquals(-1, lzmaIs.read()); | ||
|
||
is.close(); | ||
lzmaIs.close(); | ||
out.release(); | ||
|
||
return uncompressed; | ||
} | ||
} |
Oops, something went wrong.