Skip to content

Commit

Permalink
JBIG2Decoder implementation (unidoc#67)
Browse files Browse the repository at this point in the history
* Prepared skeleton and basic component implementations for the jbig2 encoding.
* Added Bitset. Implemented Bitmap.
* Decoder with old Arithmetic Decoder
* Partly working arithmetic
* Working arithmetic decoder.
* MMR patched.
* rebuild to apache.
* Working generic
* Decoded full document
* Decoded AnnexH document
* Minor issues fixed.
* Update README.md
* Fixed generic region errors. Added benchmark. Added bitmap unpadder. Added Bitmap toImage method.
* Fixed endofpage error
* Added integration test.
* Decoded all test files without errors. Implemented JBIG2Global.
* Merged with v3 version
* Fixed the EOF in the globals issue
* Fixed the JBIG2 ChocolateData Decode
* JBIG2 Added license information
* Minor fix in jbig2 encoding.
* Applied the logging convention
* Cleaned unnecessary imports
* Go modules clear unused imports
* checked out the README.md
* Moved trace to Debug. Fixed the build integrate tag in the document_decode_test.go
* Applied UniPDF Developer Guide. Fixed lint issues.
* Cleared documentation, fixed style issues.
* Added jbig2 doc.go files. Applied unipdf guide style.
* Minor code style changes.
* Minor naming and style issues fixes.
* Minor naming changes. Style issues fixed.
* Review r11 fixes.
* Integrate jbig2 tests with build system
* Added jbig2 integration test golden files.
* Minor jbig2 integration test fix
* Removed jbig2 integration image assertions
* Fixed jbig2 rowstride issue. Implemented jbig2 bit writer
* Changed golden files logic. Fixes r13 issues.
  • Loading branch information
kucjac authored and gunnsth committed Jul 14, 2019
1 parent 0460471 commit e85616c
Show file tree
Hide file tree
Showing 76 changed files with 14,426 additions and 36 deletions.
3 changes: 3 additions & 0 deletions ACKNOWLEDGEMENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```

* [Apache Java PDFBox JBIG2 Decoder](https://github.com/apache/pdfbox-jbig2), Apache License 2.0.
- Used as a base for the JBIG2 image decoder.
160 changes: 145 additions & 15 deletions core/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ import (
lzw1 "golang.org/x/image/tiff/lzw"

"github.com/unidoc/unipdf/v3/common"

"github.com/unidoc/unipdf/v3/internal/ccittfax"
"github.com/unidoc/unipdf/v3/internal/jbig2"
)

// Stream encoding filter names.
Expand Down Expand Up @@ -1980,49 +1982,177 @@ func (enc *CCITTFaxEncoder) EncodeBytes(data []byte) ([]byte, error) {
return encoder.Encode(pixels), nil
}

// JBIG2Encoder implements JBIG2 encoder/decoder (dummy, for now)
// FIXME: implement
type JBIG2Encoder struct{}
// JBIG2Encoder is the jbig2 image encoder (WIP)/decoder.
type JBIG2Encoder struct {
// Globals are the JBIG2 global segments.
Globals jbig2.Globals
// IsChocolateData defines if the data is encoded such that
// binary data '1' means black and '0' white.
// otherwise the data is called vanilla.
// Naming convention taken from: 'https://en.wikipedia.org/wiki/Binary_image#Interpretation'
IsChocolateData bool
}

// NewJBIG2Encoder returns a new instance of JBIG2Encoder.
func NewJBIG2Encoder() *JBIG2Encoder {
return &JBIG2Encoder{}
}

// setChocolateData sets the chocolate data flag when the pdf stream object contains the 'Decode' object.
// Decode object ( PDF32000:2008 7.10.2 Type 0 (Sampled) Functions).
// NOTE: this function is a temporary helper until the samples handle Decode function.
func (enc *JBIG2Encoder) setChocolateData(decode PdfObject) {
arr, ok := decode.(*PdfObjectArray)
if !ok {
common.Log.Debug("JBIG2Encoder - Decode is not an array. %T", decode)
return
}

// (PDF32000:2008 Table 39) The array should be of 2 x n size.
// For binary images n stands for 1bit, thus the array should contain 2 numbers.
vals, err := arr.GetAsFloat64Slice()
if err != nil {
common.Log.Debug("JBIG2Encoder unsupported Decode value. %s", arr.String())
return
}

if len(vals) != 2 {
return
}

first, second := int(vals[0]), int(vals[1])
if first == 1 && second == 0 {
enc.IsChocolateData = true
} else if first == 0 && second == 1 {
enc.IsChocolateData = false
} else {
common.Log.Debug("JBIG2Encoder unsupported DecodeParams->Decode value: %s", arr.String())
}
}

func newJBIG2EncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*JBIG2Encoder, error) {
encoder := NewJBIG2Encoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}

// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
if obj != nil {
switch t := obj.(type) {
case *PdfObjectDictionary:
decodeParams = t
break
case *PdfObjectArray:
if t.Len() == 1 {
if dp, ok := GetDict(t.Get(0)); ok {
decodeParams = dp
}
}
default:
common.Log.Error("DecodeParams not a dictionary %#v", obj)
return nil, errors.New("invalid DecodeParms")
}
}
}

if decodeParams != nil {
if globals := decodeParams.Get("JBIG2Globals"); globals != nil {
globalsStream, ok := globals.(*PdfObjectStream)
if !ok {
err := errors.New("the Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}

gdoc, err := jbig2.NewDocument(globalsStream.Stream)
if err != nil {
err = fmt.Errorf("decoding global stream failed. %s", err.Error())
common.Log.Debug("ERROR: %s", err)
return nil, err
}
encoder.Globals = gdoc.GlobalSegments
}
}

// Inverse the bits on the 'Decode [1.0 0.0]' function (PDF32000:2008 7.10.2)
if decode := streamObj.Get("Decode"); decode != nil {
encoder.setChocolateData(decode)
}
return encoder, nil
}

// GetFilterName returns the name of the encoding filter.
func (enc *JBIG2Encoder) GetFilterName() string {
return StreamEncodingFilterNameJBIG2
}

// MakeDecodeParams makes a new instance of an encoding dictionary based on
// the current encoder settings.
// MakeDecodeParams makes a new instance of an encoding dictionary based on the current encoder settings.
func (enc *JBIG2Encoder) MakeDecodeParams() PdfObject {
return nil
return MakeDict()
}

// MakeStreamDict makes a new instance of an encoding dictionary for a stream object.
func (enc *JBIG2Encoder) MakeStreamDict() *PdfObjectDictionary {
return MakeDict()
dict := MakeDict()
if enc.IsChocolateData {
// /Decode[1.0 0.0] - see note in the 'setChocolateData' method.
dict.Set("Decode", MakeArray(MakeFloat(1.0), MakeFloat(0.0)))
}
dict.Set("Filter", MakeName(enc.GetFilterName()))
return dict
}

// UpdateParams updates the parameter values of the encoder.
func (enc *JBIG2Encoder) UpdateParams(params *PdfObjectDictionary) {
if decode := params.Get("Decode"); decode != nil {
enc.setChocolateData(decode)
}
}

// DecodeBytes decodes a slice of JBIG2 encoded bytes and returns the result.
// DecodeBytes decodes a slice of JBIG2 encoded bytes and returns the results.
func (enc *JBIG2Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
common.Log.Debug("Error: Attempting to use unsupported encoding %s", enc.GetFilterName())
return encoded, ErrNoJBIG2Decode
// create new JBIG2 document.
doc, err := jbig2.NewDocumentWithGlobals(encoded, enc.Globals)
if err != nil {
return nil, err
}

// the jbig2 PDF document should have only one page, where page numeration
// starts from '1'.
page, err := doc.GetPage(1)
if err != nil {
return nil, err
}
if page == nil {
err = errors.New("jbig2 corrupted data. Page#1 not found")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}

// Get the page bitmap data.
bm, err := page.GetBitmap()
if err != nil {
return nil, err
}

// Inverse the data representation if the decoder is marked as 'isChocolateData'.
bm.InverseData(enc.IsChocolateData)

// By default the bitmap data contains the rowstride padding.
// In order to get rid of the rowstride padding use the bitmap.GetUnpaddedData method.
return bm.GetUnpaddedData()
}

// DecodeStream decodes a JBIG2 encoded stream and returns the result as a
// slice of bytes.
// DecodeStream decodes a JBIG2 encoded stream and returns the result as a slice of bytes.
func (enc *JBIG2Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
common.Log.Debug("Error: Attempting to use unsupported encoding %s", enc.GetFilterName())
return streamObj.Stream, ErrNoJBIG2Decode
return enc.DecodeBytes(streamObj.Stream)
}

// EncodeBytes JBIG2 encodes the passed in slice of bytes.
// EncodeBytes encodes the passed slice in slice of bytes into JBIG2.
func (enc *JBIG2Encoder) EncodeBytes(data []byte) ([]byte, error) {
common.Log.Debug("Error: Attempting to use unsupported encoding %s", enc.GetFilterName())
return data, ErrNoJBIG2Decode
Expand Down
26 changes: 13 additions & 13 deletions core/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,28 +55,28 @@ func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
}
}

if *method == StreamEncodingFilterNameFlate {
switch *method {
case StreamEncodingFilterNameFlate:
return newFlateEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameLZW {
case StreamEncodingFilterNameLZW:
return newLZWEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameDCT {
case StreamEncodingFilterNameDCT:
return newDCTEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameRunLength {
case StreamEncodingFilterNameRunLength:
return newRunLengthEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameASCIIHex {
case StreamEncodingFilterNameASCIIHex:
return NewASCIIHexEncoder(), nil
} else if *method == StreamEncodingFilterNameASCII85 || *method == "A85" {
case StreamEncodingFilterNameASCII85, "A85":
return NewASCII85Encoder(), nil
} else if *method == StreamEncodingFilterNameCCITTFax {
case StreamEncodingFilterNameCCITTFax:
return newCCITTFaxEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameJBIG2 {
return NewJBIG2Encoder(), nil
} else if *method == StreamEncodingFilterNameJPX {
case StreamEncodingFilterNameJBIG2:
return newJBIG2EncoderFromStream(streamObj, nil)
case StreamEncodingFilterNameJPX:
return NewJPXEncoder(), nil
} else {
common.Log.Debug("ERROR: Unsupported encoding method!")
return nil, fmt.Errorf("unsupported encoding method (%s)", *method)
}
common.Log.Debug("ERROR: Unsupported encoding method!")
return nil, fmt.Errorf("unsupported encoding method (%s)", *method)
}

// DecodeStream decodes the stream data and returns the decoded data.
Expand Down
10 changes: 4 additions & 6 deletions core/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,10 @@ func ResolveReference(obj PdfObject) PdfObject {
return obj
}

/*
* ResolveReferencesDeep recursively traverses through object `o`, looking up and replacing
* references with indirect objects.
* Optionally a map of already deep-resolved objects can be provided via `traversed`. The `traversed` map
* is updated while traversing the objects to avoid traversing same objects multiple times.
*/
// ResolveReferencesDeep recursively traverses through object `o`, looking up and replacing
// references with indirect objects.
// Optionally a map of already deep-resolved objects can be provided via `traversed`. The `traversed` map
// is updated while traversing the objects to avoid traversing same objects multiple times.
func ResolveReferencesDeep(o PdfObject, traversed map[PdfObject]struct{}) error {
if traversed == nil {
traversed = map[PdfObject]struct{}{}
Expand Down
8 changes: 6 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ require (
github.com/boombuler/barcode v1.0.0
github.com/gunnsth/pkcs7 v0.0.0-20181213175627-3cffc6fbfe83
github.com/stretchr/testify v1.3.0
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b
golang.org/x/text v0.3.0
golang.org/x/lint v0.0.0-20190409202823-959b441ac422 // indirect
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 // indirect
golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20190606174628-0139d5756a7d // indirect
)
17 changes: 17 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,27 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c h1:Vj5n4GlwjmQteupaxJ9+0FNOmBrHfq7vN4btdGoDZgI=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b h1:VHyIDlv3XkfCa5/a81uzaoDkHH4rr81Z62g+xlnO8uM=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422 h1:QzoH/1pFpZguR8NrRHLcO6jKqfv2zpuSqZLgdm7ZmjI=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190606174628-0139d5756a7d h1:CoaGYJ9a8IXms8Q/NUeypLWbStIszTH0IIwqBUkEB9g=
golang.org/x/tools v0.0.0-20190606174628-0139d5756a7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
Loading

0 comments on commit e85616c

Please sign in to comment.