Skip to content

Commit

Permalink
Implemented LZMA frame encoder
Browse files Browse the repository at this point in the history
Motivation:

LZMA compression algorithm has a very good compression ratio.

Modifications:

- Added `lzma-java` library which implements LZMA algorithm.
- Implemented LzmaFrameEncoder which extends MessageToByteEncoder and provides compression of outgoing messages.
- Added tests to verify the LzmaFrameEncoder and how it can compress data for the next uncompression using the original library.

Result:

LZMA encoder which can compress data using LZMA algorithm.
  • Loading branch information
idelpivnitskiy authored and Norman Maurer committed Sep 15, 2014
1 parent 08cec3c commit cf5aea5
Show file tree
Hide file tree
Showing 7 changed files with 580 additions and 1 deletion.
8 changes: 8 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ and decompression library written by Adrien Grand. It can be obtained at:
* HOMEPAGE:
* https://github.com/jpountz/lz4-java

This product optionally depends on 'lzma-java', a LZMA Java compression
and decompression library, which can be obtained at:

* LICENSE:
* license/LICENSE.lzma-java.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/jponge/lzma-java

This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression
and decompression library written by William Kinney. It can be obtained at:

Expand Down
5 changes: 5 additions & 0 deletions codec/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@
<artifactId>lz4</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.github.jponge</groupId>
<artifactId>lzma-java</artifactId>
<optional>true</optional>
</dependency>

<!-- Test dependencies for jboss marshalling encoder/decoder -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.compression;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.ByteBufOutputStream;
import io.netty.channel.ChannelHandlerContext;
import io.netty.handler.codec.MessageToByteEncoder;
import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;
import lzma.sdk.lzma.Base;
import lzma.sdk.lzma.Encoder;

import java.io.InputStream;

import static lzma.sdk.lzma.Encoder.*;

/**
* Compresses a {@link ByteBuf} using the LZMA algorithm.
*
* See <a href="http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm">LZMA</a>
* and <a href="http://svn.python.org/projects/external/xz-5.0.5/doc/lzma-file-format.txt">LZMA format</a>
* or documents in <a href="http://www.7-zip.org/sdk.html">LZMA SDK</a> archive.
*/
public class LzmaFrameEncoder extends MessageToByteEncoder<ByteBuf> {

private static final InternalLogger logger = InternalLoggerFactory.getInstance(LzmaFrameEncoder.class);

private static final int MEDIUM_DICTIONARY_SIZE = 1 << 16;

private static final int MIN_FAST_BYTES = 5;
private static final int MEDIUM_FAST_BYTES = 0x20;
private static final int MAX_FAST_BYTES = Base.kMatchMaxLen;

private static final int DEFAULT_MATCH_FINDER = EMatchFinderTypeBT4;

private static final int DEFAULT_LC = 3;
private static final int DEFAULT_LP = 0;
private static final int DEFAULT_PB = 2;

/**
* Underlying LZMA encoder in use.
*/
private final Encoder encoder;

/**
* The Properties field contains three properties which are encoded using the following formula:
*
* <p>{@code Properties = (pb * 5 + lp) * 9 + lc}</p>
*
* The field consists of
* <ol>
* <li>the number of literal context bits (lc, [0, 8]);</li>
* <li>the number of literal position bits (lp, [0, 4]);</li>
* <li>the number of position bits (pb, [0, 4]).</li>
* </ol>
*/
private final byte properties;

/**
* Dictionary Size is stored as an unsigned 32-bit little endian integer.
*/
private final int littleEndianDictionarySize;

/**
* For log warning only once.
*/
private static boolean warningLogged;

/**
* Creates LZMA encoder with default settings.
*/
public LzmaFrameEncoder() {
this(MEDIUM_DICTIONARY_SIZE);
}

/**
* Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb}
* values and the medium dictionary size of {@value #MEDIUM_DICTIONARY_SIZE}.
*/
public LzmaFrameEncoder(int lc, int lp, int pb) {
this(lc, lp, pb, MEDIUM_DICTIONARY_SIZE);
}

/**
* Creates LZMA encoder with specified dictionary size and default values of
* {@code lc} = {@value #DEFAULT_LC},
* {@code lp} = {@value #DEFAULT_LP},
* {@code pb} = {@value #DEFAULT_PB}.
*/
public LzmaFrameEncoder(int dictionarySize) {
this(DEFAULT_LC, DEFAULT_LP, DEFAULT_PB, dictionarySize);
}

/**
* Creates LZMA encoder with specified {@code lc}, {@code lp}, {@code pb} values and custom dictionary size.
*/
public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize) {
this(lc, lp, pb, dictionarySize, false, MEDIUM_FAST_BYTES);
}

/**
* Creates LZMA encoder with specified settings.
*
* @param lc
* the number of "literal context" bits, available values [0, 8], default value {@value #DEFAULT_LC}.
* @param lp
* the number of "literal position" bits, available values [0, 4], default value {@value #DEFAULT_LP}.
* @param pb
* the number of "position" bits, available values [0, 4], default value {@value #DEFAULT_PB}.
* @param dictionarySize
* available values [0, {@link java.lang.Integer#MAX_VALUE}],
* default value is {@value #MEDIUM_DICTIONARY_SIZE}.
* @param endMarkerMode
* indicates should {@link LzmaFrameEncoder} use end of stream marker or not.
* Note, that {@link LzmaFrameEncoder} always sets size of uncompressed data
* in LZMA header, so EOS marker is unnecessary. But you may use it for
* better portability. For full description see "LZMA Decoding modes" section
* of LZMA-Specification.txt in official LZMA SDK.
* @param numFastBytes
* available values [{@value #MIN_FAST_BYTES}, {@value #MAX_FAST_BYTES}].
*/
public LzmaFrameEncoder(int lc, int lp, int pb, int dictionarySize, boolean endMarkerMode, int numFastBytes) {
if (lc < 0 || lc > 8) {
throw new IllegalArgumentException("lc: " + lc + " (expected: 0-8)");
}
if (lp < 0 || lp > 4) {
throw new IllegalArgumentException("lp: " + lp + " (expected: 0-4)");
}
if (pb < 0 || pb > 4) {
throw new IllegalArgumentException("pb: " + pb + " (expected: 0-4)");
}
if (lc + pb > 4) {
if (!warningLogged) {
logger.warn("The latest versions of LZMA libraries (for example, XZ Utils) " +
"has an additional requirement: lc + lp <= 4. Data which don't follow " +
"this requirement cannot be decompressed with this libraries.");
warningLogged = true;
}
}
if (dictionarySize < 0) {
throw new IllegalArgumentException("dictionarySize: " + dictionarySize + " (expected: 0+)");
}
if (numFastBytes < MIN_FAST_BYTES || numFastBytes > MAX_FAST_BYTES) {
throw new IllegalArgumentException(String.format(
"numFastBytes: %d (expected: %d-%d)", numFastBytes, MIN_FAST_BYTES, MAX_FAST_BYTES
));
}

encoder = new Encoder();
encoder.setDictionarySize(dictionarySize);
encoder.setEndMarkerMode(endMarkerMode);
encoder.setMatchFinder(DEFAULT_MATCH_FINDER);
encoder.setNumFastBytes(numFastBytes);
encoder.setLcLpPb(lc, lp, pb);

properties = (byte) ((pb * 5 + lp) * 9 + lc);
littleEndianDictionarySize = Integer.reverseBytes(dictionarySize);
}

@Override
protected void encode(ChannelHandlerContext ctx, ByteBuf in, ByteBuf out) throws Exception {
final int length = in.readableBytes();
final InputStream bbIn = new ByteBufInputStream(in);

final ByteBufOutputStream bbOut = new ByteBufOutputStream(out);
bbOut.writeByte(properties);
bbOut.writeInt(littleEndianDictionarySize);
bbOut.writeLong(Long.reverseBytes(length));
encoder.code(bbIn, bbOut, -1, -1, null);

bbIn.close();
bbOut.close();
}

@Override
protected ByteBuf allocateBuffer(ChannelHandlerContext ctx, ByteBuf in, boolean preferDirect) throws Exception {
final int length = in.readableBytes();
final int maxOutputLength = maxOutputBufferLength(length);
return ctx.alloc().ioBuffer(maxOutputLength);
}

/**
* Calculates maximum possible size of output buffer for not compressible data.
*/
private static int maxOutputBufferLength(int inputLength) {
double factor;
if (inputLength < 200) {
factor = 1.5;
} else if (inputLength < 500) {
factor = 1.2;
} else if (inputLength < 1000) {
factor = 1.1;
} else if (inputLength < 10000) {
factor = 1.05;
} else {
factor = 1.02;
}
return 13 + (int) (inputLength * factor);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,3 @@
* <a href="http://code.google.com/p/snappy/">Snappy</a>.
*/
package io.netty.handler.codec.compression;
// TODO Implement lzma handler
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.compression;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.CompositeByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.embedded.EmbeddedChannel;
import io.netty.util.internal.ThreadLocalRandom;
import lzma.sdk.lzma.Decoder;
import lzma.streams.LzmaInputStream;
import org.junit.Before;
import org.junit.Test;

import java.io.InputStream;

import static org.junit.Assert.*;

public class LzmaFrameEncoderTest {

private static final ThreadLocalRandom rand;

private static final byte[] BYTES_SMALL = new byte[256];
private static final byte[] BYTES_LARGE = new byte[256000];

static {
rand = ThreadLocalRandom.current();
rand.nextBytes(BYTES_SMALL);
rand.nextBytes(BYTES_LARGE);
}

private EmbeddedChannel channel;

@Before
public void initChannel() {
channel = new EmbeddedChannel(new LzmaFrameEncoder());
}

private static void testCompression(final EmbeddedChannel channel, final byte[] data) throws Exception {
ByteBuf in = Unpooled.wrappedBuffer(data);
assertTrue(channel.writeOutbound(in));
assertTrue(channel.finish());

byte[] uncompressed = uncompress(channel, data.length);

assertArrayEquals(data, uncompressed);
}

@Test
public void testCompressionOfSmallChunkOfData() throws Exception {
testCompression(channel, BYTES_SMALL);
}

@Test
public void testCompressionOfLargeChunkOfData() throws Exception {
testCompression(channel, BYTES_LARGE);
}

@Test
public void testCompressionOfBatchedFlowOfData() throws Exception {
final byte[] data = BYTES_SMALL;

int written = 0, length = rand.nextInt(50);
while (written + length < data.length) {
ByteBuf in = Unpooled.wrappedBuffer(data, written, length);
assertTrue(channel.writeOutbound(in));
written += length;
length = rand.nextInt(50);
}
ByteBuf in = Unpooled.wrappedBuffer(data, written, data.length - written);
assertTrue(channel.writeOutbound(in));
assertTrue(channel.finish());

byte[] uncompressed = new byte[data.length];
int outOffset = 0;

ByteBuf msg;
while ((msg = channel.readOutbound()) != null) {
InputStream is = new ByteBufInputStream(msg);
LzmaInputStream lzmaIs = new LzmaInputStream(is, new Decoder());
for (;;) {
int read = lzmaIs.read(uncompressed, outOffset, data.length - outOffset);
if (read > 0) {
outOffset += read;
} else {
break;
}
}
assertEquals(0, is.available());
assertEquals(-1, is.read());

is.close();
lzmaIs.close();
msg.release();
}

assertArrayEquals(data, uncompressed);
}

private static byte[] uncompress(EmbeddedChannel channel, int length) throws Exception {
CompositeByteBuf out = Unpooled.compositeBuffer();
ByteBuf msg;
while ((msg = channel.readOutbound()) != null) {
out.addComponent(msg);
out.writerIndex(out.writerIndex() + msg.readableBytes());
}

InputStream is = new ByteBufInputStream(out);
LzmaInputStream lzmaIs = new LzmaInputStream(is, new Decoder());
byte[] uncompressed = new byte[length];
int remaining = length;
while (remaining > 0) {
int read = lzmaIs.read(uncompressed, length - remaining, remaining);
if (read > 0) {
remaining -= read;
} else {
break;
}
}

assertEquals(0, is.available());
assertEquals(-1, is.read());
assertEquals(-1, lzmaIs.read());

is.close();
lzmaIs.close();
out.release();

return uncompressed;
}
}
Loading

0 comments on commit cf5aea5

Please sign in to comment.