py-pdf · Lucas-C · Oct 23, 2024 · Oct 15, 2024 · Oct 17, 2024 · Oct 17, 2024
@@ -17,6 +17,8 @@ in order to get warned about deprecated features used in your code.
 This can also be enabled programmatically with `warnings.simplefilter('default', DeprecationWarning)`.
 
 ## [2.8.2] - Not released yet
+### Added
+* support for LZWDecode compression [issue #1271](https://github.com/py-pdf/fpdf2/issues/1271)
 ### Fixed
 * `FPDF.set_text_shaping(False)` was broken since version 2.7.8 and is now working properly - [issue #1287](https://github.com/py-pdf/fpdf2/issues/1287)
 

@@ -32,7 +32,7 @@ class ImageSettings:
 
 
 LOGGER = logging.getLogger(__name__)
-SUPPORTED_IMAGE_FILTERS = ("AUTO", "FlateDecode", "DCTDecode", "JPXDecode")
+SUPPORTED_IMAGE_FILTERS = ("AUTO", "FlateDecode", "DCTDecode", "JPXDecode", "LZWDecode")
 SETTINGS = ImageSettings()
 
 # fmt: off
@@ -66,6 +66,11 @@ class ImageSettings:
 ]
 # fmt: on
 
+LZW_CLEAR_TABLE_MARKER = 256  # Special code to indicate table reset
+LZW_EOD_MARKER = 257  # End-of-data marker
+LZW_INITIAL_BITS_PER_CODE = 9  # Initial code bit width
+LZW_MAX_BITS_PER_CODE = 12  # Maximum code bit width
+
 
 def preload_image(image_cache: ImageCache, name, dims=None):
     """
@@ -256,6 +261,11 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
         img = img.convert("RGBA")
         img_altered = True
 
+    if img.mode in ("P", "RGBA") and image_filter == "LZWDecode":
+        img = img.convert("RGB")
+    elif img.mode in ("LA") and image_filter == "LZWDecode":
+        img = img.convert("L")
+
     w, h = img.size
     info = RasterImageInfo()
 
@@ -527,13 +537,128 @@ def __getitem__(self, tag):
     return newimgio.read(length)
 
 
+def _to_lzwdata(img, remove_slice=None, select_slice=None):
+    data = bytearray(img.tobytes())
+
+    if remove_slice:
+        del data[remove_slice]
+    if select_slice:
+        data = data[select_slice]
+
+    if img.mode == "1":
+        row_size = ceil(img.size[0] / 8)
+    else:
+        channels_count = len(data) // (img.size[0] * img.size[1])
+        row_size = img.size[0] * channels_count
+
+    data_with_padding = bytearray()
+    for i in range(0, len(data), row_size):
+        data_with_padding.extend(b"\0")
+        data_with_padding.extend(data[i : i + row_size])
+    data = data_with_padding
+    # Start compression
+
+    # The encoder shall begin by issuing a clear-table code:
+    result_codes = [LZW_CLEAR_TABLE_MARKER]
+    table, next_code, bits_per_code, max_code_value = clear_table()
+
+    current_sequence = b""
+    for byte in data:
+        next_sequence = current_sequence + bytes([byte])
+
+        if next_sequence in table:
+            # Extend current sequence if already in the table
+            current_sequence = next_sequence
+        else:
+            # Output code for the current sequence
+            result_codes.append(table[current_sequence])
+
+            # Add the new sequence to the table if there's room
+            if next_code <= (1 << LZW_MAX_BITS_PER_CODE) - 1:
+                table[next_sequence] = next_code
+                next_code += 1
+                if next_code > max_code_value and bits_per_code < LZW_MAX_BITS_PER_CODE:
+                    bits_per_code += 1
+                    max_code_value = (1 << bits_per_code) - 1
+            else:
+                # If the table is full, emit a clear-table command
+                result_codes.append(LZW_CLEAR_TABLE_MARKER)
+                table, next_code, bits_per_code, max_code_value = clear_table()
+
+            # Start new sequence
+            current_sequence = bytes([byte])
+
+    # Ensure everything actually is encoded
+    if current_sequence:
+        result_codes.append(table[current_sequence])
+
+    result_codes.append(LZW_EOD_MARKER)
+
+    return pack_codes_into_bytes(result_codes)
+
+
+def pack_codes_into_bytes(codes):
+    """
+    Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
+    The bit-width starts at 9 bits and expands as needed.
+
+    """
+
+    (
+        _,
+        next_code,
+        bits_per_code,
+        max_code_value,
+    ) = clear_table()
+    buffer = 0
+    bits_in_buffer = 0
+    output = bytearray()
+
+    for code in codes:
+        buffer = (buffer << bits_per_code) | code
+        bits_in_buffer += bits_per_code
+
+        while bits_in_buffer >= 8:
+            bits_in_buffer -= 8
+            output.append((buffer >> bits_in_buffer) & 0xFF)
+
+        if code == LZW_CLEAR_TABLE_MARKER:
+            _, next_code, bits_per_code, max_code_value = clear_table()
+        elif code != LZW_EOD_MARKER:
+            next_code += 1
+            if next_code > max_code_value and bits_per_code < LZW_MAX_BITS_PER_CODE:
+                bits_per_code += 1
+                max_code_value = (1 << bits_per_code) - 1
+
+    if bits_in_buffer > 0:
+        output.append((buffer << (8 - bits_in_buffer)) & 0xFF)
+
+    return bytes(output)
+
+
+def clear_table():
+    """
+    Reset the encoding table and coding state to initial conditions.
+
+    """
+
+    table = {bytes([i]): i for i in range(256)}
+    next_code = LZW_EOD_MARKER + 1
+    bits_per_code = LZW_INITIAL_BITS_PER_CODE
+    max_code_value = (1 << bits_per_code) - 1
+    return table, next_code, bits_per_code, max_code_value
+
+
 def _to_data(img, image_filter, **kwargs):
     if image_filter == "FlateDecode":
         return _to_zdata(img, **kwargs)
 
     if image_filter == "CCITTFaxDecode":
         return transcode_monochrome(img)
 
+    if image_filter == "LZWDecode":
+        return _to_lzwdata(img, **kwargs)
+
     if img.mode == "LA":
         img = img.convert("L")
 

@@ -32,6 +32,7 @@
         "6.7.3-7": "REASON: setting XML/XMP metadata is entirely optional with fpdf2",
         "6.7.11-1": "REASON: up to fpdf2 v2.3.2, test_xmp_metadata included the PDF/A version and conformance level of the file, but then it started to break PDF Checker does-not-conform-to-claimed-pdfa-type rule. PENDING proper support for PDF/A",
         "6.9-2": "REASON: false positive on test/signing/sign_pkcs12.pdf",
+        "6.1.10-1": "REASON: fpdf2 wants to support LZWDecode filter",
         "6.1.11-1": "REASON: /EF is allowed in order for fpdf2 to be able to embed files",
         "6.1.11-2": "REASON: /EmbeddedFiles is allowed in order for fpdf2 to be able to embed files"
     }

@@ -50,6 +50,20 @@ def test_insert_jpg_flatedecode(tmp_path):
         assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_flatedecode.pdf", tmp_path)
 
 
+def test_insert_jpg_lzwdecode(tmp_path):
+    pdf = fpdf.FPDF()
+    pdf.compress = False
+    pdf.set_image_filter("LZWDecode")
+    pdf.add_page()
+    pdf.image(HERE / "insert_images_insert_jpg.jpg", x=15, y=15, h=140)
+    if sys.platform in ("cygwin", "win32"):
+        assert_pdf_equal(
+            pdf, HERE / "image_types_insert_jpg_lzwdecode_windows.pdf", tmp_path
+        )
+    else:
+        assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_lzwdecode.pdf", tmp_path)
+
+
 def test_insert_jpg_cmyk(tmp_path):
     pdf = fpdf.FPDF()
     pdf.compress = False

@@ -24,6 +24,9 @@ def test_png_indexed_no_transparency(tmp_path):
     pdf.set_image_filter("DCTDecode")
     pdf.image(HERE / "flower1.png", x=150, y=10, w=50, h=50)
 
+    pdf.set_image_filter("LZWDecode")
+    pdf.image(HERE / "flower1.png", x=10, y=150, w=50, h=50)
+
     # PA images
     img = Image.open(HERE / "flower1.png").convert("PA")
     assert img.mode == "PA", "img.mode is not PA"
@@ -37,6 +40,9 @@ def test_png_indexed_no_transparency(tmp_path):
     pdf.set_image_filter("JPXDecode")
     pdf.image(img, x=150, y=80, w=50, h=50)
 
+    pdf.set_image_filter("LZWDecode")
+    pdf.image(img, x=10, y=220, w=50, h=50)
+
     assert_pdf_equal(pdf, HERE / "image_png_indexed_no_transparency.pdf", tmp_path)
 
 
@@ -100,4 +106,15 @@ def insert_alpha_channel_from_RGBA(img, path_png):
     pdf.set_image_filter("JPXDecode")
     pdf.image(img, x=150, y=210, w=50, h=90)
 
+    pdf.add_page()
+
+    pdf.set_image_filter("LZWDecode")
+    pdf.image(HERE / "flower2.png", x=10, y=10, w=50, h=90)
+
+    pdf.set_image_filter("LZWDecode")
+    pdf.image(HERE / "flower3.png", x=10, y=110, w=50, h=90)
+
+    pdf.set_image_filter("LZWDecode")
+    pdf.image(img, x=10, y=210, w=50, h=90)
+
     assert_pdf_equal(pdf, HERE / "image_png_indexed_transparency.pdf", tmp_path)