Skip to content

Commit

Permalink
Merge pull request #3 from perk11/modernise
Browse files Browse the repository at this point in the history
Slight Modernisation
  • Loading branch information
asika32764 authored Jul 20, 2016
2 parents a382f95 + fdead42 commit dc11795
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 81 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ This version support composer and PSR-4 autoloading. Origin code is maintained b

``` php
$reader = new \Asika\Pdf2text;
$reader->setFilename($file);
$reader->decodePDF();

$output = $reader->output();
$output = $reader->decode($fileName);
```

# Lincense
Expand Down
162 changes: 90 additions & 72 deletions src/Pdf2text.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,39 @@
class Pdf2text
{
/**
* Use setUnicode(TRUE|FALSE)
*
* @var int
*/
protected $multibyte = 4;
private $multibyte = 4;

/**
* ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
*
* @var int
*/
protected $convertquotes = ENT_QUOTES;
private $convertquotes = ENT_QUOTES;

/**
* TRUE if you have problems with time-out
*
* @var bool
*/
protected $showprogress = false;
private $showprogress = false;

/**
* Property filename.
*
* @var string
*/
protected $filename = '';
private $filename = '';

/**
* Property decodedtext.
*
* @var string
*/
protected $decodedtext = '';
private $decodedtext = '';

/**
* Set file name.
*
* @deprecated Use "decode" method instead
* @param string $filename
*
* @return void
Expand All @@ -66,7 +62,7 @@ public function setFilename($filename)

/**
* Get output text.
*
* @deprecated Use "decode" method instead
* @param boolean $echo True to echo it.
*
* @return string
Expand All @@ -85,15 +81,15 @@ public function output($echo = false)

/**
* Using unicode.
*
* @deprecated Use "decode" method instead
* @param boolean $input True or not to use unicode.
*
* @return void
*/
public function setUnicode($input)
{
// 4 for unicode. But 2 should work in most cases just fine
if ($input == true)
if ($input)
{
$this->multibyte = 4;
}
Expand All @@ -103,26 +99,74 @@ public function setUnicode($input)
}
}

/**
* Method to set property showprogress
* @deprecated Use "decode" method instead
* @param boolean $showprogress
*
* @return static Return self to support chaining.
*/
public function showProgress($showprogress)
{
$this->showprogress = $showprogress;

return $this;
}

/**
* Method to set property convertquotes
* @deprecated Use "decode" method instead
* @param int $convertquotes
*
* @return static Return self to support chaining.
*/
public function convertQuotes($convertquotes)
{
$this->convertquotes = $convertquotes;

return $this;
}
/**
* Decode PDF
*
* @return string
* @param string $fileName
* @param int $convertQuotes ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
* @param bool $showProgress TRUE if you have problems with time-out
* @param bool $multiByteUnicode
* @return string
*/
public function decode($fileName, $convertQuotes = ENT_QUOTES, $showProgress = false, $multiByteUnicode = true)
{
$this->convertquotes = $convertQuotes;
$this->showprogress = $showProgress;
$this->multibyte = $multiByteUnicode ? 4 : 2;
$this->filename = $fileName;
$this->decodePDF();

return $this->output();
}

/**
* Decode PDF
*
* @deprecated Use "decode" method instead
* @return string
*/
public function decodePDF()
{
// Read the data from pdf file
$infile = @file_get_contents($this->filename, FILE_BINARY);
if (empty($infile))
$fileContents = @file_get_contents($this->filename, FILE_BINARY);
if (empty($fileContents))
{
return "";
return '';
}

// Get all text data.
$transformations = array();
$texts = array();

// Get the list of all objects.
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $infile . "endobj\r", $objects);
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $fileContents . "endobj\r", $objects);
$objects = @$objects[1];

// Select objects with streams.
Expand Down Expand Up @@ -172,9 +216,11 @@ public function decodePDF()
}
}
}

// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);

// Analyze text blocks taking into account character transformations and return results.
return $this->getTextUsingTransformations($texts, $transformations);
}

/**
Expand All @@ -184,19 +230,19 @@ public function decodePDF()
*
* @return string
*/
public function decodeAsciiHex($input)
private function decodeAsciiHex($input)
{
$output = "";
$isOdd = true;
$isComment = false;

for ($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++)
for ($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] !== '>'; $i++)
{
$c = $input[$i];

if ($isComment)
{
if ($c == '\r' || $c == '\n')
if ($c === '\r' || $c === '\n')
{
$isComment = false;
}
Expand Down Expand Up @@ -237,7 +283,7 @@ public function decodeAsciiHex($input)
}
}

if ($input[$i] != '>')
if ($input[$i] !== '>')
{
return "";
}
Expand All @@ -257,36 +303,36 @@ public function decodeAsciiHex($input)
*
* @return string
*/
public function decodeAscii85($input)
private function decodeAscii85($input)
{
$output = "";

$isComment = false;
$ords = array();

for ($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++)
for ($i = 0, $state = 0; $i < strlen($input) && $input[$i] !== '~'; $i++)
{
$c = $input[$i];

if ($isComment)
{
if ($c == '\r' || $c == '\n')
if ($c === '\r' || $c === '\n')
{
$isComment = false;
}
continue;
}

if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
if ($c === '\0' || $c === '\t' || $c === '\r' || $c === '\f' || $c === '\n' || $c === ' ')
{
continue;
}
if ($c == '%')
if ($c === '%')
{
$isComment = true;
continue;
}
if ($c == 'z' && $state === 0)
if ($c === 'z' && $state === 0)
{
$output .= str_repeat(chr(0), 4);
continue;
Expand Down Expand Up @@ -341,7 +387,7 @@ public function decodeAscii85($input)
*
* @return string
*/
public function decodeFlate($data)
private function decodeFlate($data)
{
return @gzuncompress($data);
}
Expand All @@ -353,7 +399,7 @@ public function decodeFlate($data)
*
* @return array
*/
public function getObjectOptions($object)
private function getObjectOptions($object)
{
$options = array();

Expand Down Expand Up @@ -396,7 +442,7 @@ public function getObjectOptions($object)
*
* @return string
*/
public function getDecodedStream($stream, $options)
private function getDecodedStream($stream, $options)
{
$data = "";

Expand All @@ -411,19 +457,19 @@ public function getDecodedStream($stream, $options)

foreach ($options as $key => $value)
{
if ($key == "ASCIIHexDecode")
if ($key === "ASCIIHexDecode")
{
$_stream = $this->decodeAsciiHex($_stream);
}
elseif ($key == "ASCII85Decode")
elseif ($key === "ASCII85Decode")
{
$_stream = $this->decodeAscii85($_stream);
}
elseif ($key == "FlateDecode")
elseif ($key === "FlateDecode")
{
$_stream = $this->decodeFlate($_stream);
}
elseif ($key == "Crypt")
elseif ($key === "Crypt")
{ // TO DO
}
}
Expand All @@ -441,7 +487,7 @@ public function getDecodedStream($stream, $options)
*
* @return void
*/
public function getDirtyTexts(&$texts, $textContainers)
private function getDirtyTexts(&$texts, $textContainers)
{
for ($j = 0; $j < count($textContainers); $j++)
{
Expand All @@ -468,7 +514,7 @@ public function getDirtyTexts(&$texts, $textContainers)
*
* @return void
*/
public function getCharTransformations(&$transformations, $stream)
private function getCharTransformations(&$transformations, $stream)
{
preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
Expand Down Expand Up @@ -527,7 +573,7 @@ public function getCharTransformations(&$transformations, $stream)
*
* @return string
*/
public function getTextUsingTransformations($texts, $transformations)
private function getTextUsingTransformations($texts, $transformations)
{
$document = "";

Expand Down Expand Up @@ -578,23 +624,23 @@ public function getTextUsingTransformations($texts, $transformations)
{
$plain .= $c2;
}
elseif ($c2 == "n")
elseif ($c2 === "n")
{
$plain .= '\n';
}
elseif ($c2 == "r")
elseif ($c2 === "r")
{
$plain .= '\r';
}
elseif ($c2 == "t")
elseif ($c2 === "t")
{
$plain .= '\t';
}
elseif ($c2 == "b")
elseif ($c2 === "b")
{
$plain .= '\b';
}
elseif ($c2 == "f")
elseif ($c2 === "f")
{
$plain .= '\f';
}
Expand Down Expand Up @@ -625,32 +671,4 @@ public function getTextUsingTransformations($texts, $transformations)

return $document;
}

/**
* Method to set property showprogress
*
* @param boolean $showprogress
*
* @return static Return self to support chaining.
*/
public function showProgress($showprogress)
{
$this->showprogress = $showprogress;

return $this;
}

/**
* Method to set property convertquotes
*
* @param int $convertquotes
*
* @return static Return self to support chaining.
*/
public function convertQuotes($convertquotes)
{
$this->convertquotes = $convertquotes;

return $this;
}
}
Loading

0 comments on commit dc11795

Please sign in to comment.