Skip to content

Commit

Permalink
[XLSX] Add mergeCells reader (#273)
Browse files Browse the repository at this point in the history
Co-authored-by: Filippo Tessarotto <zoeslam@gmail.com>
  • Loading branch information
karakum and Slamdunk authored Sep 24, 2024
1 parent 2a67be1 commit 263c3fb
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ jobs:

- name: "Infection on DIFF"
if: "${{ github.event_name == 'pull_request' && matrix.code-coverage != 'none' }}"
continue-on-error: true
timeout-minutes: 10
run: "vendor/bin/infection --ansi --threads=$(nproc) --skip-initial-tests --coverage=coverage --git-diff-lines --git-diff-base=origin/${{ github.base_ref }} --show-mutations --verbose --ignore-msi-with-no-mutations --min-msi=100"
env:
INFECTION_BADGE_API_KEY: "${{ secrets.INFECTION_BADGE_API_KEY }}"

- name: "Infection on complete code base"
if: "${{ github.event_name != 'pull_request' && matrix.code-coverage != 'none' }}"
continue-on-error: true
timeout-minutes: 10
run: "vendor/bin/infection --ansi --threads=$(nproc) --skip-initial-tests --coverage=coverage"
env:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
/.phpunit.result.cache
/composer.lock
/infections.log
/.idea
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"phpstan/phpstan": "^1.12.4",
"phpstan/phpstan-phpunit": "^1.4.0",
"phpstan/phpstan-strict-rules": "^1.6.1",
"phpunit/phpunit": "^10.5.20 || ^11.3.0"
"phpunit/phpunit": "^10.5.20 || ^11.3.6"
},
"suggest": {
"ext-iconv": "To handle non UTF-8 CSV files (if \"php-mbstring\" is not already installed or is too limited)",
Expand Down
18 changes: 18 additions & 0 deletions src/Reader/SheetWithMergeCellsInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

declare(strict_types=1);

namespace OpenSpout\Reader;

/**
* @template T of RowIteratorInterface
*
* @extends SheetInterface<T>
*/
interface SheetWithMergeCellsInterface extends SheetInterface
{
/**
* @return list<string> Merge cells list ["C7:E7", "A9:D10"]
*/
public function getMergeCells(): array;
}
18 changes: 14 additions & 4 deletions src/Reader/XLSX/Manager/SheetManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use OpenSpout\Reader\XLSX\RowIterator;
use OpenSpout\Reader\XLSX\Sheet;
use OpenSpout\Reader\XLSX\SheetHeaderReader;
use OpenSpout\Reader\XLSX\SheetMergeCellsReader;

/**
* @internal
Expand Down Expand Up @@ -186,13 +187,24 @@ private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $

$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);

$mergeCells = [];
if ($this->options->SHOULD_LOAD_MERGE_CELLS) {
$mergeCells = (new SheetMergeCellsReader(
$this->filePath,
$sheetDataXMLFilePath,
$xmlReader = new XMLReader(),
new XMLProcessor($xmlReader)
))->getMergeCells();
}

return new Sheet(
$this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
$this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
$sheetIndexZeroBased,
$sheetName,
$isSheetActive,
$isSheetVisible
$isSheetVisible,
$mergeCells
);
}

Expand Down Expand Up @@ -240,8 +252,6 @@ private function createRowIterator(
Options $options,
SharedStringsManager $sharedStringsManager
): RowIterator {
$xmlReader = new XMLReader();

$workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
$styleManager = new StyleManager(
$filePath,
Expand All @@ -262,7 +272,7 @@ private function createRowIterator(
$filePath,
$sheetDataXMLFilePath,
$options->SHOULD_PRESERVE_EMPTY_ROWS,
$xmlReader,
$xmlReader = new XMLReader(),
new XMLProcessor($xmlReader),
$cellValueFormatter,
new RowManager()
Expand Down
1 change: 1 addition & 0 deletions src/Reader/XLSX/Options.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ final class Options
public bool $SHOULD_FORMAT_DATES = false;
public bool $SHOULD_PRESERVE_EMPTY_ROWS = false;
public bool $SHOULD_USE_1904_DATES = false;
public bool $SHOULD_LOAD_MERGE_CELLS = false;
}
38 changes: 30 additions & 8 deletions src/Reader/XLSX/Sheet.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
namespace OpenSpout\Reader\XLSX;

use OpenSpout\Reader\Common\ColumnWidth;
use OpenSpout\Reader\SheetWithMergeCellsInterface;
use OpenSpout\Reader\SheetWithVisibilityInterface;

/**
* @implements SheetWithVisibilityInterface<RowIterator>
* @implements SheetWithMergeCellsInterface<RowIterator>
*/
final class Sheet implements SheetWithVisibilityInterface
final class Sheet implements SheetWithVisibilityInterface, SheetWithMergeCellsInterface
{
/** @var RowIterator To iterate over sheet's rows */
private readonly RowIterator $rowIterator;
Expand All @@ -30,21 +32,33 @@ final class Sheet implements SheetWithVisibilityInterface
/** @var bool Whether the sheet is visible */
private readonly bool $isVisible;

/** @var string[] Merge cells list ["C7:E7", "A9:D10"] */
private readonly array $mergeCells;

/**
* @param RowIterator $rowIterator The corresponding row iterator
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
* @param RowIterator $rowIterator The corresponding row iterator
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
* @param list<string> $mergeCells Merge cells list ["C7:E7", "A9:D10"]
*/
public function __construct(RowIterator $rowIterator, SheetHeaderReader $headerReader, int $sheetIndex, string $sheetName, bool $isSheetActive, bool $isSheetVisible)
{
public function __construct(
RowIterator $rowIterator,
SheetHeaderReader $headerReader,
int $sheetIndex,
string $sheetName,
bool $isSheetActive,
bool $isSheetVisible,
array $mergeCells
) {
$this->rowIterator = $rowIterator;
$this->headerReader = $headerReader;
$this->index = $sheetIndex;
$this->name = $sheetName;
$this->isActive = $isSheetActive;
$this->isVisible = $isSheetVisible;
$this->mergeCells = $mergeCells;
}

public function getRowIterator(): RowIterator
Expand Down Expand Up @@ -91,4 +105,12 @@ public function isVisible(): bool
{
return $this->isVisible;
}

/**
* @return list<string> Merge cells list ["C7:E7", "A9:D10"]
*/
public function getMergeCells(): array
{
return $this->mergeCells;
}
}
69 changes: 69 additions & 0 deletions src/Reader/XLSX/SheetMergeCellsReader.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php

declare(strict_types=1);

namespace OpenSpout\Reader\XLSX;

use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Wrapper\XMLReader;

use function ltrim;

/**
* @internal
*/
final class SheetMergeCellsReader
{
public const XML_NODE_MERGE_CELL = 'mergeCell';
public const XML_ATTRIBUTE_REF = 'ref';

/** @var list<string> Merged cells list */
private array $mergeCells = [];

/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param XMLProcessor $xmlProcessor Helper to process XML files
*/
public function __construct(
string $filePath,
string $sheetDataXMLFilePath,
XMLReader $xmlReader,
XMLProcessor $xmlProcessor
) {
$sheetDataXMLFilePath = ltrim($sheetDataXMLFilePath, '/');

// Register all callbacks to process different nodes when reading the XML file
$xmlProcessor->registerCallback(self::XML_NODE_MERGE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processMergeCellsStartingNode']);
$xmlReader->close();

if (false === $xmlReader->openFileInZip($filePath, $sheetDataXMLFilePath)) {
throw new IOException("Could not open \"{$sheetDataXMLFilePath}\".");
}

// Now read the entire header of the sheet, until we reach the <sheetData> element
$xmlProcessor->readUntilStopped();
$xmlReader->close();
}

/**
* @return list<string>
*/
public function getMergeCells(): array
{
return $this->mergeCells;
}

/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<mergeCells>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processMergeCellsStartingNode(XMLReader $xmlReader): int
{
$this->mergeCells[] = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF);

return XMLProcessor::PROCESSING_CONTINUE;
}
}
11 changes: 11 additions & 0 deletions tests/Reader/XLSX/ReaderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,17 @@ public function testReadShouldThrowException(string $filePath): void
@$this->getAllRowsForFile($filePath);
}

#[DataProvider('dataProviderForTestReadShouldThrowException')]
public function testReadShouldThrowExceptionWithMergeCells(string $filePath): void
{
$options = new Options();
$options->SHOULD_LOAD_MERGE_CELLS = true;

$this->expectException(IOException::class);

$this->getAllRowsForFile($filePath, $options);
}

public static function dataProviderForTestReadForAllWorksheets(): array
{
return [
Expand Down
31 changes: 30 additions & 1 deletion tests/Reader/XLSX/SheetTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,43 @@ public function testReaderSheetIteratorKeyMethodShouldReturnFirstKey(): void
$reader->close();
}

public function testReaderShouldReturnEmptySheetMergedCellsByDefault(): void
{
$sheets = $this->openFileAndReturnSheets('two_sheets_with_merged_cells.xlsx');

self::assertEmpty($sheets[0]->getMergeCells());
self::assertEmpty($sheets[1]->getMergeCells());
}

public function testReaderShouldReturnCorrectSheetMergedCells(): void
{
$sheets = $this->openFileAndReturnSheets('two_sheets_with_merged_cells.xlsx', true);
$mergedCellsExpected = [
['A1:B1', 'A2:A3', 'C3:E5', 'B2:E2'],
['A1:A4', 'A5:D5', 'E2:E5', 'C1:E1', 'B1:B3', 'B4:C4', 'D3:D4', 'C2:D2'],
];
$mergedCellsActual = [
$sheets[0]->getMergeCells(),
$sheets[1]->getMergeCells(),
];

self::assertSameSize($mergedCellsExpected[0], $mergedCellsActual[0]);
self::assertEmpty(array_diff($mergedCellsActual[0], $mergedCellsExpected[0]), 'There should be no difference between merged cells on first sheet');
self::assertEmpty(array_diff($mergedCellsExpected[0], $mergedCellsActual[0]), 'There should be no difference between merged cells on first sheet');
self::assertSameSize($mergedCellsExpected[1], $mergedCellsActual[1]);
self::assertEmpty(array_diff($mergedCellsActual[1], $mergedCellsExpected[1]), 'There should be no difference between merged cells on second sheet');
self::assertEmpty(array_diff($mergedCellsExpected[1], $mergedCellsActual[1]), 'There should be no difference between merged cells on second sheet');
}

/**
* @return Sheet[]
*/
private function openFileAndReturnSheets(string $fileName): array
private function openFileAndReturnSheets(string $fileName, bool $withMergedCells = false): array
{
$resourcePath = TestUsingResource::getResourcePath($fileName);
$options = new Options();
$options->setTempFolder((new TestUsingResource())->getTempFolderPath());
$options->SHOULD_LOAD_MERGE_CELLS = $withMergedCells;
$reader = new Reader($options);
$reader->open($resourcePath);

Expand Down
Binary file not shown.

0 comments on commit 263c3fb

Please sign in to comment.