Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #1271 added LZWDecode compression #1286

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ in order to get warned about deprecated features used in your code.
This can also be enabled programmatically with `warnings.simplefilter('default', DeprecationWarning)`.

## [2.8.2] - Not released yet
### Added
* support for LZWDecode compression [issue #1271](https://github.com/py-pdf/fpdf2/issues/1271)
### Fixed
* `FPDF.set_text_shaping(False)` was broken since version 2.7.8 and is now working properly - [issue #1287](https://github.com/py-pdf/fpdf2/issues/1287)

Expand Down
127 changes: 126 additions & 1 deletion fpdf/image_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ImageSettings:


LOGGER = logging.getLogger(__name__)
SUPPORTED_IMAGE_FILTERS = ("AUTO", "FlateDecode", "DCTDecode", "JPXDecode")
SUPPORTED_IMAGE_FILTERS = ("AUTO", "FlateDecode", "DCTDecode", "JPXDecode", "LZWDecode")
SETTINGS = ImageSettings()

# fmt: off
Expand Down Expand Up @@ -66,6 +66,11 @@ class ImageSettings:
]
# fmt: on

LZW_CLEAR_TABLE_MARKER = 256 # Special code to indicate table reset
LZW_EOD_MARKER = 257 # End-of-data marker
LZW_INITIAL_BITS_PER_CODE = 9 # Initial code bit width
LZW_MAX_BITS_PER_CODE = 12 # Maximum code bit width


def preload_image(image_cache: ImageCache, name, dims=None):
"""
Expand Down Expand Up @@ -256,6 +261,11 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
img = img.convert("RGBA")
img_altered = True

if img.mode in ("P", "RGBA") and image_filter == "LZWDecode":
img = img.convert("RGB")
elif img.mode in ("LA") and image_filter == "LZWDecode":
img = img.convert("L")

w, h = img.size
info = RasterImageInfo()

Expand Down Expand Up @@ -527,13 +537,128 @@ def __getitem__(self, tag):
return newimgio.read(length)


def _to_lzwdata(img, remove_slice=None, select_slice=None):
data = bytearray(img.tobytes())

if remove_slice:
del data[remove_slice]
if select_slice:
data = data[select_slice]

if img.mode == "1":
row_size = ceil(img.size[0] / 8)
else:
channels_count = len(data) // (img.size[0] * img.size[1])
row_size = img.size[0] * channels_count

data_with_padding = bytearray()
for i in range(0, len(data), row_size):
data_with_padding.extend(b"\0")
data_with_padding.extend(data[i : i + row_size])
data = data_with_padding
# Start compression

# The encoder shall begin by issuing a clear-table code:
result_codes = [LZW_CLEAR_TABLE_MARKER]
table, next_code, bits_per_code, max_code_value = clear_table()

current_sequence = b""
for byte in data:
next_sequence = current_sequence + bytes([byte])

if next_sequence in table:
# Extend current sequence if already in the table
current_sequence = next_sequence
else:
# Output code for the current sequence
result_codes.append(table[current_sequence])

# Add the new sequence to the table if there's room
if next_code <= (1 << LZW_MAX_BITS_PER_CODE) - 1:
table[next_sequence] = next_code
next_code += 1
if next_code > max_code_value and bits_per_code < LZW_MAX_BITS_PER_CODE:
bits_per_code += 1
max_code_value = (1 << bits_per_code) - 1
else:
# If the table is full, emit a clear-table command
result_codes.append(LZW_CLEAR_TABLE_MARKER)
table, next_code, bits_per_code, max_code_value = clear_table()

# Start new sequence
current_sequence = bytes([byte])

# Ensure everything actually is encoded
if current_sequence:
result_codes.append(table[current_sequence])

result_codes.append(LZW_EOD_MARKER)

return pack_codes_into_bytes(result_codes)


def pack_codes_into_bytes(codes):
"""
Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
The bit-width starts at 9 bits and expands as needed.

"""

(
_,
next_code,
bits_per_code,
max_code_value,
) = clear_table()
buffer = 0
bits_in_buffer = 0
output = bytearray()

for code in codes:
buffer = (buffer << bits_per_code) | code
bits_in_buffer += bits_per_code

while bits_in_buffer >= 8:
bits_in_buffer -= 8
output.append((buffer >> bits_in_buffer) & 0xFF)

if code == LZW_CLEAR_TABLE_MARKER:
_, next_code, bits_per_code, max_code_value = clear_table()
elif code != LZW_EOD_MARKER:
next_code += 1
if next_code > max_code_value and bits_per_code < LZW_MAX_BITS_PER_CODE:
bits_per_code += 1
max_code_value = (1 << bits_per_code) - 1

if bits_in_buffer > 0:
output.append((buffer << (8 - bits_in_buffer)) & 0xFF)

return bytes(output)


def clear_table():
"""
Reset the encoding table and coding state to initial conditions.

"""

table = {bytes([i]): i for i in range(256)}
next_code = LZW_EOD_MARKER + 1
bits_per_code = LZW_INITIAL_BITS_PER_CODE
max_code_value = (1 << bits_per_code) - 1
return table, next_code, bits_per_code, max_code_value


def _to_data(img, image_filter, **kwargs):
if image_filter == "FlateDecode":
return _to_zdata(img, **kwargs)

if image_filter == "CCITTFaxDecode":
return transcode_monochrome(img)

if image_filter == "LZWDecode":
return _to_lzwdata(img, **kwargs)

if img.mode == "LA":
img = img.convert("L")

Expand Down
1 change: 1 addition & 0 deletions scripts/verapdf-ignore.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"6.7.3-7": "REASON: setting XML/XMP metadata is entirely optional with fpdf2",
"6.7.11-1": "REASON: up to fpdf2 v2.3.2, test_xmp_metadata included the PDF/A version and conformance level of the file, but then it started to break PDF Checker does-not-conform-to-claimed-pdfa-type rule. PENDING proper support for PDF/A",
"6.9-2": "REASON: false positive on test/signing/sign_pkcs12.pdf",
"6.1.10-1": "REASON: fpdf2 wants to support LZWDecode filter",
"6.1.11-1": "REASON: /EF is allowed in order for fpdf2 to be able to embed files",
"6.1.11-2": "REASON: /EmbeddedFiles is allowed in order for fpdf2 to be able to embed files"
}
Expand Down
Binary file not shown.
Binary file not shown.
14 changes: 14 additions & 0 deletions test/image/image_types/test_insert_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ def test_insert_jpg_flatedecode(tmp_path):
assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_flatedecode.pdf", tmp_path)


def test_insert_jpg_lzwdecode(tmp_path):
pdf = fpdf.FPDF()
pdf.compress = False
pdf.set_image_filter("LZWDecode")
pdf.add_page()
pdf.image(HERE / "insert_images_insert_jpg.jpg", x=15, y=15, h=140)
if sys.platform in ("cygwin", "win32"):
assert_pdf_equal(
pdf, HERE / "image_types_insert_jpg_lzwdecode_windows.pdf", tmp_path
)
else:
assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_lzwdecode.pdf", tmp_path)


def test_insert_jpg_cmyk(tmp_path):
pdf = fpdf.FPDF()
pdf.compress = False
Expand Down
Binary file modified test/image/png_indexed/image_png_indexed_no_transparency.pdf
Binary file not shown.
Binary file modified test/image/png_indexed/image_png_indexed_transparency.pdf
Binary file not shown.
17 changes: 17 additions & 0 deletions test/image/png_indexed/test_png_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def test_png_indexed_no_transparency(tmp_path):
pdf.set_image_filter("DCTDecode")
pdf.image(HERE / "flower1.png", x=150, y=10, w=50, h=50)

pdf.set_image_filter("LZWDecode")
pdf.image(HERE / "flower1.png", x=10, y=150, w=50, h=50)

# PA images
img = Image.open(HERE / "flower1.png").convert("PA")
assert img.mode == "PA", "img.mode is not PA"
Expand All @@ -37,6 +40,9 @@ def test_png_indexed_no_transparency(tmp_path):
pdf.set_image_filter("JPXDecode")
pdf.image(img, x=150, y=80, w=50, h=50)

pdf.set_image_filter("LZWDecode")
pdf.image(img, x=10, y=220, w=50, h=50)

assert_pdf_equal(pdf, HERE / "image_png_indexed_no_transparency.pdf", tmp_path)


Expand Down Expand Up @@ -100,4 +106,15 @@ def insert_alpha_channel_from_RGBA(img, path_png):
pdf.set_image_filter("JPXDecode")
pdf.image(img, x=150, y=210, w=50, h=90)

pdf.add_page()

pdf.set_image_filter("LZWDecode")
pdf.image(HERE / "flower2.png", x=10, y=10, w=50, h=90)

pdf.set_image_filter("LZWDecode")
pdf.image(HERE / "flower3.png", x=10, y=110, w=50, h=90)

pdf.set_image_filter("LZWDecode")
pdf.image(img, x=10, y=210, w=50, h=90)

assert_pdf_equal(pdf, HERE / "image_png_indexed_transparency.pdf", tmp_path)
Loading