Skip to content

Commit

Permalink
Fix constant compression in-place check for bools (kuzudb#3211)
Browse files Browse the repository at this point in the history
Bools are stored packed in column chunks, so indexing into the data using the size in bytes of the type will not work.
  • Loading branch information
benjaminwinger authored Apr 5, 2024
1 parent ec6e309 commit 2100fa3
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 69 deletions.
8 changes: 0 additions & 8 deletions src/include/storage/store/null_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,12 @@ class NullColumn final : public Column {
void write(common::node_group_idx_t nodeGroupIdx, common::offset_t offsetInChunk,
ColumnChunk* data, common::offset_t dataOffset, common::length_t numValues) override;

bool canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx,
const ChunkCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo,
const ChunkCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) override;
bool canCommitInPlace(transaction::Transaction* transaction,
common::node_group_idx_t nodeGroupIdx, const std::vector<common::offset_t>& dstOffsets,
ColumnChunk* chunk, common::offset_t srcOffset) override;
void commitLocalChunkInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx,
const ChunkCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo,
const ChunkCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo,
const offset_set_t& deleteInfo) override;

private:
bool checkUpdateInPlace(const ColumnChunkMetadata& metadata, const ChunkCollection& localChunk,
const offset_to_row_idx_t& writeInfo);
std::unique_ptr<ColumnChunk> getEmptyChunkForCommit(uint64_t capacity) override {
return ColumnChunkFactory::createNullColumnChunk(enableCompression, capacity);
}
Expand Down
12 changes: 10 additions & 2 deletions src/storage/compression/compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,16 @@ bool CompressionMetadata::canUpdateInPlace(
switch (compression) {
case CompressionType::CONSTANT: {
// Value can be updated in place only if it is identical to the value already stored.
auto size = getDataTypeSizeInChunk(physicalType);
return memcmp(data + pos * size, this->data.data(), size) == 0;
switch (physicalType) {
case PhysicalTypeID::BOOL: {
return NullMask::isNull(reinterpret_cast<const uint64_t*>(data), pos) ==
*reinterpret_cast<const bool*>(this->data.data());
} break;
default: {
auto size = getDataTypeSizeInChunk(physicalType);
return memcmp(data + pos * size, this->data.data(), size) == 0;
}
}
}
case CompressionType::BOOLEAN_BITPACKING:
case CompressionType::UNCOMPRESSED: {
Expand Down
3 changes: 2 additions & 1 deletion src/storage/store/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,8 @@ bool Column::checkUpdateInPlace(const ColumnChunkMetadata& metadata,
for (auto rowIdx : rowIdxesToRead) {
auto [chunkIdx, offsetInLocalChunk] =
LocalChunkedGroupCollection::getChunkIdxAndOffsetInChunk(rowIdx);
if (localChunks[chunkIdx]->getNullChunk()->isNull(offsetInLocalChunk)) {
if (localChunks[chunkIdx]->getNullChunk() != nullptr &&
localChunks[chunkIdx]->getNullChunk()->isNull(offsetInLocalChunk)) {
continue;
}
if (!metadata.compMeta.canUpdateInPlace(
Expand Down
58 changes: 0 additions & 58 deletions src/storage/store/null_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,64 +163,6 @@ void NullColumn::write(node_group_idx_t nodeGroupIdx, offset_t offsetInChunk, Co
}
}

bool NullColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx,
const ChunkCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo,
const ChunkCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) {
auto metadata = getMetadata(nodeGroupIdx, transaction->getType());
if (metadata.compMeta.canAlwaysUpdateInPlace()) {
return true;
}
return checkUpdateInPlace(metadata, localInsertChunk, insertInfo) &&
checkUpdateInPlace(metadata, localUpdateChunk, updateInfo);
}

bool NullColumn::checkUpdateInPlace(const ColumnChunkMetadata& metadata,
const ChunkCollection& localChunks, const offset_to_row_idx_t& writeInfo) {
std::vector<row_idx_t> rowIdxesToRead;
for (auto& [_, rowIdx] : writeInfo) {
rowIdxesToRead.push_back(rowIdx);
}
std::sort(rowIdxesToRead.begin(), rowIdxesToRead.end());
for (auto rowIdx : rowIdxesToRead) {
auto [chunkIdx, offsetInLocalChunk] =
LocalChunkedGroupCollection::getChunkIdxAndOffsetInChunk(rowIdx);
auto localNullChunk =
ku_dynamic_cast<ColumnChunk*, NullColumnChunk*>(localChunks[chunkIdx]);
bool value = localNullChunk->isNull(offsetInLocalChunk);
if (!metadata.compMeta.canUpdateInPlace(
reinterpret_cast<const uint8_t*>(&value), 0, dataType.getPhysicalType())) {
return false;
}
}
return true;
}

bool NullColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx,
const std::vector<offset_t>& dstOffsets, ColumnChunk* chunk, offset_t srcOffset) {
KU_ASSERT(chunk->getNullChunk() == nullptr &&
chunk->getDataType().getPhysicalType() == PhysicalTypeID::BOOL);
auto metadata = getMetadata(nodeGroupIdx, transaction->getType());
auto maxDstOffset = getMaxOffset(dstOffsets);
if ((metadata.compMeta.numValues(BufferPoolConstants::PAGE_4KB_SIZE, dataType) *
metadata.numPages) < maxDstOffset) {
// Note that for constant compression, `metadata.numPages` will be equal to 0. Thus, this
// function will always return false.
return false;
}
if (metadata.compMeta.canAlwaysUpdateInPlace()) {
return true;
}
auto nullChunk = ku_dynamic_cast<ColumnChunk*, NullColumnChunk*>(chunk);
for (auto i = 0u; i < dstOffsets.size(); i++) {
bool value = nullChunk->isNull(srcOffset + i);
if (!metadata.compMeta.canUpdateInPlace(
reinterpret_cast<const uint8_t*>(&value), 0, dataType.getPhysicalType())) {
return false;
}
}
return true;
}

void NullColumn::commitLocalChunkInPlace(Transaction* /*transaction*/,
node_group_idx_t nodeGroupIdx, const ChunkCollection& localInsertChunks,
const offset_to_row_idx_t& insertInfo, const ChunkCollection& localUpdateChunks,
Expand Down

0 comments on commit 2100fa3

Please sign in to comment.