Skip to content

Commit

Permalink
Simplify API
Browse files Browse the repository at this point in the history
Make decode() the only method necessary
  • Loading branch information
perk11 committed Jul 20, 2016
1 parent 71914ff commit fdead42
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 55 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ This version support composer and PSR-4 autoloading. Origin code is maintained b

``` php
$reader = new \Asika\Pdf2text;
$reader->setFilename($file);
$reader->decodePDF();

$output = $reader->output();
$output = $reader->decode($fileName);
```

# Lincense
Expand Down
110 changes: 64 additions & 46 deletions src/Pdf2text.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,39 @@
class Pdf2text
{
/**
* Use setUnicode(TRUE|FALSE)
*
* @var int
*/
protected $multibyte = 4;
private $multibyte = 4;

/**
* ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
*
* @var int
*/
protected $convertquotes = ENT_QUOTES;
private $convertquotes = ENT_QUOTES;

/**
* TRUE if you have problems with time-out
*
* @var bool
*/
protected $showprogress = false;
private $showprogress = false;

/**
* Property filename.
*
* @var string
*/
protected $filename = '';
private $filename = '';

/**
* Property decodedtext.
*
* @var string
*/
protected $decodedtext = '';
private $decodedtext = '';

/**
* Set file name.
*
* @deprecated Use "decode" method instead
* @param string $filename
*
* @return void
Expand All @@ -66,7 +62,7 @@ public function setFilename($filename)

/**
* Get output text.
*
* @deprecated Use "decode" method instead
* @param boolean $echo True to echo it.
*
* @return string
Expand All @@ -85,7 +81,7 @@ public function output($echo = false)

/**
* Using unicode.
*
* @deprecated Use "decode" method instead
* @param boolean $input True or not to use unicode.
*
* @return void
Expand All @@ -103,26 +99,74 @@ public function setUnicode($input)
}
}

/**
* Method to set property showprogress
* @deprecated Use "decode" method instead
* @param boolean $showprogress
*
* @return static Return self to support chaining.
*/
public function showProgress($showprogress)
{
$this->showprogress = $showprogress;

return $this;
}

/**
* Method to set property convertquotes
* @deprecated Use "decode" method instead
* @param int $convertquotes
*
* @return static Return self to support chaining.
*/
public function convertQuotes($convertquotes)
{
$this->convertquotes = $convertquotes;

return $this;
}
/**
* Decode PDF
*
* @return string
* @param string $fileName
* @param int $convertQuotes ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
* @param bool $showProgress TRUE if you have problems with time-out
* @param bool $multiByteUnicode
* @return string
*/
public function decode($fileName, $convertQuotes = ENT_QUOTES, $showProgress = false, $multiByteUnicode = true)
{
$this->convertquotes = $convertQuotes;
$this->showprogress = $showProgress;
$this->multibyte = $multiByteUnicode ? 4 : 2;
$this->filename = $fileName;
$this->decodePDF();

return $this->output();
}

/**
* Decode PDF
*
* @deprecated Use "decode" method instead
* @return string
*/
public function decodePDF()
{
// Read the data from pdf file
$infile = @file_get_contents($this->filename, FILE_BINARY);
if (empty($infile))
$fileContents = @file_get_contents($this->filename, FILE_BINARY);
if (empty($fileContents))
{
return "";
return '';
}

// Get all text data.
$transformations = array();
$texts = array();

// Get the list of all objects.
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $infile . "endobj\r", $objects);
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $fileContents . "endobj\r", $objects);
$objects = @$objects[1];

// Select objects with streams.
Expand Down Expand Up @@ -172,9 +216,11 @@ public function decodePDF()
}
}
}

// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);

// Analyze text blocks taking into account character transformations and return results.
return $this->getTextUsingTransformations($texts, $transformations);
}

/**
Expand Down Expand Up @@ -625,32 +671,4 @@ private function getTextUsingTransformations($texts, $transformations)

return $document;
}

/**
* Method to set property showprogress
*
* @param boolean $showprogress
*
* @return static Return self to support chaining.
*/
public function showProgress($showprogress)
{
$this->showprogress = $showprogress;

return $this;
}

/**
* Method to set property convertquotes
*
* @param int $convertquotes
*
* @return static Return self to support chaining.
*/
public function convertQuotes($convertquotes)
{
$this->convertquotes = $convertquotes;

return $this;
}
}
6 changes: 1 addition & 5 deletions test/Pdf2textTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ protected function setUp()
*/
public function testDecodePDF()
{
$this->instance->setFilename(__DIR__ . '/test.pdf');
$this->instance->decodePDF();

$output = $this->instance->output();
$output = $this->instance->decode(__DIR__ . '/test.pdf');

$text = <<<TXT
Nick Fury: You think you
Expand All @@ -53,4 +50,3 @@ public function testDecodePDF()
$this->assertEquals($output, $text);
}
}

0 comments on commit fdead42

Please sign in to comment.