Skip to content

Commit

Permalink
Fix stream parsing bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Hopding committed Jul 6, 2019
1 parent 18878dc commit 764fb4b
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/core/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ export class MissingCatalogError extends Error {

/***** Parser Errors ******/

interface Position {
export interface Position {
line: number;
column: number;
offset: number;
Expand Down
25 changes: 21 additions & 4 deletions src/core/parser/PDFObjectParser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {
PDFObjectParsingError,
PDFStreamParsingError,
Position,
UnbalancedParenthesisError,
} from 'src/core/errors';
import PDFArray from 'src/core/objects/PDFArray';
Expand Down Expand Up @@ -221,9 +222,27 @@ class PDFObjectParser extends BaseParser {
}

const start = this.bytes.offset();
let end: number;

// TODO: Try to use dict's `Length` entry, but use this as backup...
const Length = dict.get(PDFName.of('Length'));
if (Length instanceof PDFNumber) {
end = start + Length.value();
this.bytes.moveTo(end);
this.skipWhitespaceAndComments();
if (!this.matchKeyword(Keywords.endstream)) {
this.bytes.moveTo(start);
end = this.findEndOfStreamFallback(startPos);
}
} else {
end = this.findEndOfStreamFallback(startPos);
}

const contents = this.bytes.slice(start, end);

return PDFRawStream.of(dict, contents);
}

protected findEndOfStreamFallback(startPos: Position) {
// Move to end of stream, while handling nested streams
let nestingLvl = 1;
let end = this.bytes.offset();
Expand All @@ -249,9 +268,7 @@ class PDFObjectParser extends BaseParser {

if (nestingLvl !== 0) throw new PDFStreamParsingError(startPos);

const contents = this.bytes.slice(start, end);

return PDFRawStream.of(dict, contents);
return end;
}
}

Expand Down
6 changes: 6 additions & 0 deletions src/core/parser/PDFParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,14 @@ class PDFParser extends PDFObjectParser {
private tryToParseInvalidIndirectObject() {
const startPos = this.bytes.position();

console.warn(
`Trying to parse invalid object: ${JSON.stringify(startPos)})`,
);

const ref = this.parseIndirectObjectHeader();

console.warn(`Invalid object ref: ${ref}`);

this.skipWhitespaceAndComments();
const start = this.bytes.offset();

Expand Down
6 changes: 5 additions & 1 deletion src/core/structures/PDFObjectStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class PDFObjectStream extends PDFFlateStream {
encode = true,
) => new PDFObjectStream(context, objects, encode);

readonly objects: IndirectObject[];
private readonly objects: IndirectObject[];
private readonly offsets: Array<[number, number]>;
private readonly offsetsString: string;

Expand All @@ -36,6 +36,10 @@ class PDFObjectStream extends PDFFlateStream {
this.dict.set(PDFName.of('First'), PDFNumber.of(this.offsetsString.length));
}

getObjectsCount(): number {
return this.objects.length;
}

clone(context?: PDFContext): PDFObjectStream {
return PDFObjectStream.withContextAndObjects(
context || this.dict.context,
Expand Down
2 changes: 2 additions & 0 deletions src/core/writers/PDFStreamWriter.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import PDFHeader from 'src/core/document/PDFHeader';
import PDFTrailer from 'src/core/document/PDFTrailer';
import PDFInvalidObject from 'src/core/objects/PDFInvalidObject';
import PDFName from 'src/core/objects/PDFName';
import PDFNumber from 'src/core/objects/PDFNumber';
import PDFObject from 'src/core/objects/PDFObject';
Expand Down Expand Up @@ -64,6 +65,7 @@ class PDFStreamWriter extends PDFWriter {
const shouldNotCompress =
ref === this.context.trailerInfo.Encrypt ||
object instanceof PDFStream ||
object instanceof PDFInvalidObject ||
ref.generationNumber !== 0;

if (shouldNotCompress) {
Expand Down
3 changes: 2 additions & 1 deletion src/core/writers/PDFWriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ class PDFWriter {
buffer[offset++] = CharCodes.Newline;
buffer[offset++] = CharCodes.Newline;

const n = object instanceof PDFObjectStream ? object.objects.length : 1;
const n =
object instanceof PDFObjectStream ? object.getObjectsCount() : 1;
if (this.shouldWaitForTick(n)) await waitForTick();
}

Expand Down
2 changes: 1 addition & 1 deletion tests/core/parser/PDFParser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ describe(`PDFParser`, () => {

expect(context.header).toBeInstanceOf(PDFHeader);
expect(context.header.toString()).toEqual('%PDF-1.6\n%');
expect(context.enumerateIndirectObjects().length).toBe(208);
expect(context.enumerateIndirectObjects().length).toBe(17);
});

it(`can fix incorrect values for /Root`, async () => {
Expand Down

0 comments on commit 764fb4b

Please sign in to comment.