Skip to content

Commit

Permalink
JBIG2 Encoder support for inserting binary images into PDF (unidoc#288)
Browse files Browse the repository at this point in the history
* Added JBIG2 PDF support
* Added JBIG2 Encoder binary image requirements
* PR unidoc#288 revision r1 fixes
* PR unidoc#288 revision r2 fixes
  • Loading branch information
kucjac authored Apr 3, 2020
1 parent 64a43b3 commit 29efa30
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 54 deletions.
1 change: 1 addition & 0 deletions contentstream/inline-image.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*Cont
if encoder == nil {
encoder = core.NewRawEncoder()
}
encoder.UpdateParams(img.GetParamsDict())

inlineImage := ContentStreamInlineImage{}
if img.ColorComponents == 1 {
Expand Down
120 changes: 88 additions & 32 deletions core/encoding_jbig2.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
package core

import (
"bytes"
"image"
"image/color"

"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/internal/imageutil"

"github.com/unidoc/unipdf/v3/internal/jbig2"
"github.com/unidoc/unipdf/v3/internal/jbig2/bitmap"
Expand Down Expand Up @@ -55,6 +55,17 @@ const JB2ImageAutoThreshold = -1.0
// The similarity is defined by the 'Threshold' variable (default: 0.95). The less the value is, the more components
// matches to single class, thus the compression is better, but the result might become lossy.
type JBIG2Encoder struct {
// These values are required to be set for the 'EncodeBytes' method.
// ColorComponents defines the number of color components for provided image.
ColorComponents int
// BitsPerComponent is the number of bits that stores per color component
BitsPerComponent int
// Width is the width of the image to encode
Width int
// Height is the height of the image to encode.
Height int

// Encode Page and Decode parameters
d *document.Document
// Globals are the JBIG2 global segments.
Globals jbig2.Globals
Expand All @@ -69,7 +80,9 @@ type JBIG2Encoder struct {

// NewJBIG2Encoder creates a new JBIG2Encoder.
func NewJBIG2Encoder() *JBIG2Encoder {
return &JBIG2Encoder{}
return &JBIG2Encoder{
d: document.InitEncodeDocument(false),
}
}

// AddPageImage adds the page with the image 'img' to the encoder context in order to encode it jbig2 document.
Expand Down Expand Up @@ -165,25 +178,47 @@ func (enc *JBIG2Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error
// to encode given image.
func (enc *JBIG2Encoder) EncodeBytes(data []byte) ([]byte, error) {
const processName = "JBIG2Encoder.EncodeBytes"
if len(data) == 0 {
return nil, errors.Errorf(processName, "input 'data' not defined")
if enc.ColorComponents != 1 || enc.BitsPerComponent != 1 {
return nil, errors.Errorf(processName, "provided invalid input image. JBIG2 Encoder requires binary images data")
}
i, _, err := image.Decode(bytes.NewReader(data))
b, err := bitmap.NewWithUnpaddedData(enc.Width, enc.Height, data)
if err != nil {
return nil, errors.Wrap(err, processName, "decode input image")
return nil, err
}
encoded, err := enc.encodeImage(i)
if err != nil {
settings := enc.DefaultPageSettings
if err = settings.Validate(); err != nil {
return nil, errors.Wrap(err, processName, "")
}
return encoded, nil

switch settings.Compression {
case JB2Generic:
if err = enc.d.AddGenericPage(b, settings.DuplicatedLinesRemoval); err != nil {
return nil, errors.Wrap(err, processName, "")
}
case JB2SymbolCorrelation:
return nil, errors.Error(processName, "symbol correlation encoding not implemented yet")
case JB2SymbolRankHaus:
return nil, errors.Error(processName, "symbol rank haus encoding not implemented yet")
default:
return nil, errors.Error(processName, "provided invalid compression")
}
return enc.Encode()
}

// EncodeImage encodes 'img' golang image.Image into jbig2 encoded bytes document using default encoder settings.
func (enc *JBIG2Encoder) EncodeImage(img image.Image) ([]byte, error) {
return enc.encodeImage(img)
}

// EncodeJBIG2Image encodes 'img' into jbig2 encoded bytes stream, using default encoder settings.
func (enc *JBIG2Encoder) EncodeJBIG2Image(img *JBIG2Image) ([]byte, error) {
const processName = "core.EncodeJBIG2Image"
if err := enc.AddPageImage(img, &enc.DefaultPageSettings); err != nil {
return nil, errors.Wrap(err, processName, "")
}
return enc.Encode()
}

// Encode encodes previously prepare jbig2 document and stores it as the byte slice.
func (enc *JBIG2Encoder) Encode() (data []byte, err error) {
const processName = "JBIG2Document.Encode"
Expand Down Expand Up @@ -217,8 +252,24 @@ func (enc *JBIG2Encoder) MakeStreamDict() *PdfObjectDictionary {
}

// UpdateParams updates the parameter values of the encoder.
// The body of this method is empty but required to implement StreamEncoder interface.
// Implements StreamEncoder interface.
func (enc *JBIG2Encoder) UpdateParams(params *PdfObjectDictionary) {
bpc, err := GetNumberAsInt64(params.Get("BitsPerComponent"))
if err == nil {
enc.BitsPerComponent = int(bpc)
}
width, err := GetNumberAsInt64(params.Get("Width"))
if err == nil {
enc.Width = int(width)
}
height, err := GetNumberAsInt64(params.Get("Height"))
if err == nil {
enc.Height = int(height)
}
colorComponents, err := GetNumberAsInt64(params.Get("ColorComponents"))
if err == nil {
enc.ColorComponents = int(colorComponents)
}
}

func (enc *JBIG2Encoder) encodeImage(i image.Image) ([]byte, error) {
Expand Down Expand Up @@ -262,24 +313,29 @@ func newJBIG2DecoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObje
}
}
}

if decodeParams != nil {
if globals := decodeParams.Get("JBIG2Globals"); globals != nil {
var err error

globalsStream, ok := globals.(*PdfObjectStream)
if !ok {
err = errors.Error(processName, "jbig2.Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}
encoder.Globals, err = jbig2.DecodeGlobals(globalsStream.Stream)
if err != nil {
err = errors.Wrap(err, processName, "corrupted jbig2 encoded data")
common.Log.Debug("ERROR: %s", err)
return nil, err
}
}
// if no decode params provided - end fast.
if decodeParams == nil {
return encoder, nil
}
// set image parameters.
encoder.UpdateParams(decodeParams)
globals := decodeParams.Get("JBIG2Globals")
if globals == nil {
return encoder, nil
}
// decode and set JBIG2 Globals.
var err error
globalsStream, ok := globals.(*PdfObjectStream)
if !ok {
err = errors.Error(processName, "jbig2.Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %v", err)
return nil, err
}
encoder.Globals, err = jbig2.DecodeGlobals(globalsStream.Stream)
if err != nil {
err = errors.Wrap(err, processName, "corrupted jbig2 encoded data")
common.Log.Debug("ERROR: %v", err)
return nil, err
}
return encoder, nil
}
Expand Down Expand Up @@ -348,17 +404,17 @@ func GoImageToJBIG2(i image.Image, bwThreshold float64) (*JBIG2Image, error) {
var th uint8
if bwThreshold == JB2ImageAutoThreshold {
// autoThreshold using triangle method
gray := bitmap.ImgToGray(i)
histogram := bitmap.GrayImageHistogram(gray)
th = bitmap.AutoThresholdTriangle(histogram)
gray := imageutil.ImgToGray(i)
histogram := imageutil.GrayImageHistogram(gray)
th = imageutil.AutoThresholdTriangle(histogram)
i = gray
} else if bwThreshold > 1.0 || bwThreshold < 0.0 {
// check if bwThreshold is unknown - set to 0.0 is not in the allowed range.
return nil, errors.Error(processName, "provided threshold is not in a range {0.0, 1.0}")
} else {
th = uint8(255 * bwThreshold)
}
gray := bitmap.ImgToBinary(i, th)
gray := imageutil.ImgToBinary(i, th)
return bwToJBIG2Image(gray), nil
}

Expand Down
12 changes: 11 additions & 1 deletion creator/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ func (img *Image) GetMargins() (float64, float64, float64, float64) {
return img.margins.left, img.margins.right, img.margins.top, img.margins.bottom
}

// ConvertToBinary converts current image data into binary (Bi-level image) format.
// If provided image is RGB or GrayScale the function converts it into binary image
// using histogram auto threshold method.
func (img *Image) ConvertToBinary() error {
return img.img.ConvertToBinary()
}

// makeXObject makes the encoded XObject Image that will be used in the PDF.
func (img *Image) makeXObject() error {
encoder := img.encoder
Expand All @@ -181,7 +188,10 @@ func (img *Image) makeXObject() error {
func (img *Image) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
if img.xobj == nil {
// Build the XObject Image if not already prepared.
img.makeXObject()
if err := img.makeXObject(); err != nil {
return nil, ctx, err
}

}

var blocks []*Block
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ github.com/boombuler/barcode v1.0.0 h1:s1TvRnXwL2xJRaccrdcBQMZxq6X7DvsMogtmJeHDd
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/gunnsth/pkcs7 v0.0.0-20181213175627-3cffc6fbfe83 h1:saj5dTV7eQ1wFg/gVZr1SfbkOmg8CYO9R8frHgQiyR4=
github.com/gunnsth/pkcs7 v0.0.0-20181213175627-3cffc6fbfe83/go.mod h1:xaGEIRenAiJcGgd9p62zbiP4993KaV3PdjczwGnP50I=
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package bitmap
package imageutil

import (
"image"
Expand Down Expand Up @@ -131,6 +131,11 @@ func ImgToGray(i image.Image) *image.Gray {
return g
}

// IsGrayImgBlackAndWhite checks if provided gray image is BlackAndWhite - Binary image.
func IsGrayImgBlackAndWhite(i *image.Gray) bool {
return isGrayBlackWhite(i)
}

func blackOrWhite(c, threshold uint8) uint8 {
if c < threshold {
return 255
Expand Down
16 changes: 7 additions & 9 deletions internal/jbig2/bitmap/bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func (b *Bitmap) addBorderGeneral(left, right, top, bot int, val int) (*Bitmap,

// addPadBits creates new data byte slice that contains extra padding on the last byte for each row.
func (b *Bitmap) addPadBits() (err error) {
const processName = "addPadBits"
const processName = "bitmap.addPadBits"
endbits := b.Width % 8
if endbits == 0 {
// no partial words
Expand All @@ -559,18 +559,16 @@ func (b *Bitmap) addPadBits() (err error) {
w := writer.NewMSB(data)
temp := make([]byte, fullBytes)
var (
i, j int
i int
bits uint64
)
for i = 0; i < b.Height; i++ {
// iterate over full bytes
for j = 0; j < fullBytes; j++ {
if _, err = r.Read(temp); err != nil {
return errors.Wrap(err, processName, "full byte")
}
if _, err = w.Write(temp); err != nil {
return errors.Wrap(err, processName, "full bytes")
}
if _, err = r.Read(temp); err != nil {
return errors.Wrap(err, processName, "full byte")
}
if _, err = w.Write(temp); err != nil {
return errors.Wrap(err, processName, "full bytes")
}
// read unused bits
if bits, err = r.ReadBits(byte(endbits)); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion internal/jbig2/document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func (d *Document) AddGenericPage(bm *bitmap.Bitmap, duplicateLineRemoval bool)
const processName = "Document.AddGenericPage"
// check if this is PDFMode and there is already a page
if !d.FullHeaders && d.NumberOfPages != 0 {
return errors.Error(processName, "document already contains page. FileMode disallows addoing more than one page")
return errors.Error(processName, "document already contains page. FileMode disallows adding more than one page")
}
// initialize page
page := &Page{
Expand Down
1 change: 1 addition & 0 deletions internal/jbig2/tests/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ jbig2files
.test
*.jbig2
.envrc
.env
Loading

0 comments on commit 29efa30

Please sign in to comment.