Skip to content

Commit

Permalink
Merge branch 'development' of https://github.com/unidoc/unipdf into c…
Browse files Browse the repository at this point in the history
…olumns
  • Loading branch information
peterwilliams97 committed Jun 22, 2020
2 parents e6be021 + 7bf2f62 commit a7779a3
Show file tree
Hide file tree
Showing 23 changed files with 1,008 additions and 158 deletions.
26 changes: 12 additions & 14 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
node {
// Install the desired Go version
def root = tool name: 'go 1.11.5', type: 'go'
def root = tool name: 'go 1.14.3', type: 'go'

env.GOROOT="${root}"
env.GOPATH="${WORKSPACE}/gopath"
// Hack for 1.11.5 testing work.
env.CGO_ENABLED="0"
env.PATH="${root}/bin:${env.GOPATH}/bin:${env.PATH}"
env.GOCACHE="off"
env.GOBIN="${WORKSPACE}/bin"
env.PATH="${root}/bin:${env.GOBIN}:${env.PATH}"
env.UNIDOC_EXTRACT_FORCETEST="1"
env.UNIDOC_E2E_FORCE_TESTS="1"
env.UNIDOC_EXTRACT_TESTDATA="/home/jenkins/corpus/unidoc-extractor-testdata"
Expand All @@ -19,13 +16,13 @@ node {
env.UNIDOC_JBIG2_TESTDATA="/home/jenkins/corpus/jbig2-testdata"
env.UNIDOC_FDFMERGE_TESTDATA="/home/jenkins/corpus/fdfmerge-testdata"
env.UNIDOC_GS_BIN_PATH="/usr/bin/gs"
// Hack for 1.11.5 testing work.
env.CGO_ENABLED="0"

env.TMPDIR="${WORKSPACE}/temp"
sh "mkdir -p ${env.GOBIN}"
sh "mkdir -p ${env.TMPDIR}"

dir("${GOPATH}/src/github.com/unidoc/unipdf") {
dir("${WORKSPACE}/unipdf") {
sh 'go version'

stage('Checkout') {
Expand All @@ -35,11 +32,9 @@ node {

stage('Prepare') {
// Get linter and other build tools.
sh 'go get -u golang.org/x/lint/golint'
sh 'go get golang.org/x/lint/golint'
sh 'go get github.com/tebeka/go2xunit'
sh 'go get github.com/t-yuki/gocover-cobertura'
// Get all dependencies (for tests also).
sh 'go get -t ./...'
}

stage('Linting') {
Expand All @@ -53,7 +48,7 @@ node {
stage('Testing') {
// Go test - No tolerance.
sh "rm -f ${env.TMPDIR}/*.pdf"
sh '2>&1 go test -v ./... | tee gotest.txt'
sh '2>&1 go test -count=1 -v ./... | tee gotest.txt'
}

stage('Check generated PDFs') {
Expand All @@ -62,7 +57,7 @@ node {
}

stage('Test coverage') {
sh 'go test -coverprofile=coverage.out -covermode=atomic -coverpkg=./... ./...'
sh 'go test -count=1 -coverprofile=coverage.out -covermode=atomic -coverpkg=./... ./...'
sh '/home/jenkins/codecov.sh'
sh 'gocover-cobertura < coverage.out > coverage.xml'
step([$class: 'CoberturaPublisher', coberturaReportFile: 'coverage.xml'])
Expand All @@ -80,7 +75,7 @@ node {
}
}

dir("${GOPATH}/src/github.com/unidoc/unipdf-examples") {
dir("${WORKSPACE}/unipdf-examples") {
stage('Build examples') {
// Output environment variables (useful for debugging).
sh("printenv")
Expand All @@ -97,6 +92,9 @@ node {

echo "Pulling unipdf-examples on branch ${examplesBranch}"
git url: 'https://github.com/unidoc/unidoc-examples.git', branch: examplesBranch

// Use replace directive to use disk version of unipdf.
sh 'echo "replace github.com/unidoc/unipdf/v3 => ../unipdf" >>go.mod'

// Dependencies for examples.
sh './build_examples.sh'
Expand Down
10 changes: 5 additions & 5 deletions common/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ import (
)

const releaseYear = 2020
const releaseMonth = 4
const releaseDay = 23
const releaseHour = 1
const releaseMin = 30
const releaseMonth = 6
const releaseDay = 15
const releaseHour = 20
const releaseMin = 15

// Version holds version information, when bumping this make sure to bump the released at stamp also.
const Version = "3.6.2"
const Version = "3.8.0"

var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)
21 changes: 14 additions & 7 deletions core/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,6 @@ func newDCTEncoderFromStream(streamObj *PdfObjectStream, multiEnc *MultiEncoder)
return nil, err
}
encoded = e

}

bufReader := bytes.NewReader(encoded)
Expand Down Expand Up @@ -2158,6 +2157,9 @@ func newMultiEncoderFromStream(streamObj *PdfObjectStream) (*MultiEncoder, error

// GetFilterName returns the names of the underlying encoding filters,
// separated by spaces.
// Note: This is just a string, should not be used in /Filter dictionary entry. Use GetFilterArray for that.
// TODO(v4): Refactor to GetFilter() which can be used for /Filter (either Name or Array), this can be
// renamed to String() as a pretty string to use in debugging etc.
func (enc *MultiEncoder) GetFilterName() string {
name := ""
for idx, encoder := range enc.encoders {
Expand All @@ -2169,6 +2171,16 @@ func (enc *MultiEncoder) GetFilterName() string {
return name
}

// GetFilterArray returns the names of the underlying encoding filters in an array that
// can be used as /Filter entry.
func (enc *MultiEncoder) GetFilterArray() *PdfObjectArray {
names := make([]PdfObject, len(enc.encoders))
for i, e := range enc.encoders {
names[i] = MakeName(e.GetFilterName())
}
return MakeArray(names...)
}

// MakeDecodeParams makes a new instance of an encoding dictionary based on
// the current encoder settings.
func (enc *MultiEncoder) MakeDecodeParams() PdfObject {
Expand Down Expand Up @@ -2201,12 +2213,7 @@ func (enc *MultiEncoder) AddEncoder(encoder StreamEncoder) {
// MakeStreamDict makes a new instance of an encoding dictionary for a stream object.
func (enc *MultiEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := MakeDict()

names := make([]PdfObject, len(enc.encoders))
for i, e := range enc.encoders {
names[i] = MakeName(e.GetFilterName())
}
dict.Set("Filter", MakeArray(names...))
dict.Set("Filter", enc.GetFilterArray())

// Pass all values from children, except Filter and DecodeParms.
for _, encoder := range enc.encoders {
Expand Down
71 changes: 71 additions & 0 deletions creator/creator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3067,6 +3067,77 @@ func TestPageLabels(t *testing.T) {
require.Equal(t, core.EqualObjects(genPageLabels, pageLabels), true)
}

func TestReferencedPageDestinations(t *testing.T) {
testPages := func(buf *bytes.Buffer, expectedPages, expectedNullDestPages int) {
reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)

// Check number of pages in catalog.
numPages, err := reader.GetNumPages()
require.NoError(t, err)
require.Equal(t, expectedPages, numPages)

// Check outline destionation pages.
outlines, err := reader.GetOutlines()
require.NoError(t, err)

var nullDestPages int
var validDestPages int
for _, entry := range outlines.Entries {
pageObj := entry.Dest.PageObj
require.NotNil(t, pageObj)

if core.IsNullObject(entry.Dest.PageObj) {
nullDestPages++
continue
}

_, _, err := reader.PageFromIndirectObject(pageObj)
require.NoError(t, err)
validDestPages++
}

require.Equal(t, expectedPages, validDestPages)
require.Equal(t, expectedNullDestPages, nullDestPages)
}

// Generate and test input file.
c := New()
c.AddTOC = true

numPages := 10
for i := 0; i < numPages; i++ {
chapter := c.NewChapter(fmt.Sprintf("Chapter %d", i+1))
paragraph := c.NewParagraph(fmt.Sprintf("Content for chapter %d", i+1))
chapter.Add(paragraph)
require.NoError(t, c.Draw(chapter))

if i < numPages-1 {
c.NewPage()
}
}

buf := bytes.NewBuffer(nil)
require.NoError(t, c.Write(buf))
testPages(buf, 11, 0)

// Generate and test split input file.
reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)

writer := model.NewPdfWriter()
for i, page := range reader.PageList {
if i%2 == 0 {
require.NoError(t, writer.AddPage(page))
}
}
writer.AddOutlineTree(reader.GetOutlineTree())

buf = bytes.NewBuffer(nil)
require.NoError(t, writer.Write(buf))
testPages(buf, 6, 5)
}

var errRenderNotSupported = errors.New("rendering pdf is not supported on this system")

// renderPDFToPNGs uses ghostscript (gs) to render specified PDF file into a set of PNG images (one per page).
Expand Down
12 changes: 6 additions & 6 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ func New(page *model.PdfPage) (*Extractor, error) {
// fmt.Printf("%s\n", contents)
// fmt.Println("========================= ::: =========================")

mediaBox, err := page.GetMediaBox()
if err != nil {
return nil, err
}
return NewFromContents(contents, page.Resources)
}

// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents(contents string, resources *model.PdfPageResources) (*Extractor, error) {
e := &Extractor{
contents: contents,
resources: page.Resources,
mediaBox: *mediaBox,
resources: resources,
fontCache: map[string]fontEntry{},
formResults: map[string]textResult{},
}
Expand Down
9 changes: 8 additions & 1 deletion extractor/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,11 @@ func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error {
common.Log.Trace("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args)
return core.ErrTypeError
}
to.renderText(charcodes)
err := to.renderText(charcodes)
if err != nil {
common.Log.Debug("Render text error: %v", err)
return err
}
default:
common.Log.Debug("ERROR: showTextAdjusted. Unexpected type (%T) args=%+v", o, args)
return core.ErrTypeError
Expand Down Expand Up @@ -795,6 +799,7 @@ func (to *textObject) renderText(data []byte) error {
continue
}

// TODO(gunnsth): Assuming 1:1 charcode[i] <-> rune[i] mapping.
code := charcodes[i]
// The location of the text on the page in device coordinates is given by trm, the text
// rendering matrix.
Expand Down Expand Up @@ -858,6 +863,8 @@ func (to *textObject) renderText(data []byte) error {
} else if font.Encoder() == nil {
common.Log.Debug("ERROR: No encoding. font=%s", font)
} else {
// TODO: This lookup seems confusing. Went from bytes <-> charcodes already.
// NOTE: This is needed to register runes by the font encoder - for subsetting (optimization).
original, ok := font.Encoder().CharcodeToRune(code)
if ok {
mark.original = string(original)
Expand Down
4 changes: 1 addition & 3 deletions extractor/text_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ var doStress bool
func init() {
flag.BoolVar(&doStress, "extractor-stresstest", false, "Run text extractor stress tests.")
common.SetLogger(common.NewConsoleLogger(common.LogLevelInfo))
if flag.Lookup("test.v") != nil || true {
isTesting = true
}
isTesting = true
}

// TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`.
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ require (
github.com/adrg/sysfont v0.1.0
github.com/boombuler/barcode v1.0.0
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0
github.com/sirupsen/logrus v1.6.0 // indirect
github.com/stretchr/testify v1.4.0
github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a
github.com/unidoc/unitype v0.1.0
github.com/unidoc/unitype v0.2.0
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 // indirect
golang.org/x/text v0.3.2
)
10 changes: 8 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
Expand All @@ -24,6 +26,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q=
github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo=
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
Expand All @@ -32,8 +36,8 @@ github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df h1:1RV3lxQ6L6xGFNhngp
github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8=
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl66m7MJ8OqBjq8jpWBXPK6/RKtqeTkc=
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw=
github.com/unidoc/unitype v0.1.0 h1:6zJYMl8XdwFBD45Cmg8Ge13WyE92jwLuK1tk2IsRb9s=
github.com/unidoc/unitype v0.1.0/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU=
github.com/unidoc/unitype v0.2.0 h1:N+ZKjwz8UDU0qa1IYzstDLffvQEctFo+bo6b6ZqW+9M=
github.com/unidoc/unitype v0.2.0/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
Expand All @@ -45,6 +49,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY=
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 h1:ogLJMz+qpzav7lGMh10LMvAkM/fAoGlaiiHYiFYdm80=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
Expand Down
Loading

0 comments on commit a7779a3

Please sign in to comment.