Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@92a1ccb (#565)
Browse files Browse the repository at this point in the history
Fix for underflow issue on number of rows in the CSV Reader (duckdb/duckdb#14587)
Issue template: Add Swift redirect (duckdb/duckdb#14588)
chore: Add EOL to source files (duckdb/duckdb#14583)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
  • Loading branch information
github-actions[bot] and krlmlr authored Nov 2, 2024
1 parent 505175f commit 669906e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
// We check for a weird case, where we ignore an extra value, if it is a null value
return;
}
validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
}
cur_col_id++;
chunk_col_id++;
Expand Down Expand Up @@ -447,7 +447,11 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
}

DataChunk &StringValueResult::ToChunk() {
parse_chunk.SetCardinality(number_of_rows);
if (number_of_rows < 0) {
throw InternalException("CSVScanner: ToChunk() function. Has a negative number of rows, this indicates an "
"issue with the error handler.");
}
parse_chunk.SetCardinality(static_cast<idx_t>(number_of_rows));
return parse_chunk;
}

Expand Down Expand Up @@ -658,7 +662,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
result.RemoveLastLine();
} else {
// Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables.
result.borked_rows.insert(result.number_of_rows);
result.borked_rows.insert(static_cast<idx_t>(result.number_of_rows));
result.cur_col_id = 0;
result.chunk_col_id = 0;
}
Expand Down Expand Up @@ -740,9 +744,9 @@ bool StringValueResult::AddRowInternal() {
}

if (current_errors.HandleErrors(*this)) {
line_positions_per_row[number_of_rows] = current_line_position;
line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
number_of_rows++;
if (number_of_rows >= result_size) {
if (static_cast<idx_t>(number_of_rows) >= result_size) {
// We have a full chunk
return true;
}
Expand All @@ -769,7 +773,7 @@ bool StringValueResult::AddRowInternal() {
if (empty) {
static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
} else {
validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
}
cur_col_id++;
chunk_col_id++;
Expand Down Expand Up @@ -799,11 +803,11 @@ bool StringValueResult::AddRowInternal() {
RemoveLastLine();
}
}
line_positions_per_row[number_of_rows] = current_line_position;
line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
cur_col_id = 0;
chunk_col_id = 0;
number_of_rows++;
if (number_of_rows >= result_size) {
if (static_cast<idx_t>(number_of_rows) >= result_size) {
// We have a full chunk
return true;
}
Expand Down Expand Up @@ -861,12 +865,12 @@ bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_
if (empty) {
static_cast<string_t *>(result.vector_ptr[0])[result.number_of_rows] = string_t();
} else {
result.validity_mask[0]->SetInvalid(result.number_of_rows);
result.validity_mask[0]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
}
result.number_of_rows++;
}
}
if (result.number_of_rows >= result.result_size) {
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
// We have a full chunk
return true;
}
Expand Down Expand Up @@ -1389,7 +1393,7 @@ void StringValueResult::SkipBOM() const {
void StringValueResult::RemoveLastLine() {
// potentially de-nullify values
for (idx_t i = 0; i < chunk_col_id; i++) {
validity_mask[i]->SetValid(number_of_rows);
validity_mask[i]->SetValid(static_cast<idx_t>(number_of_rows));
}
// reset column trackers
cur_col_id = 0;
Expand Down Expand Up @@ -1520,7 +1524,7 @@ void StringValueScanner::SetStart() {
}

void StringValueScanner::FinalizeChunkProcess() {
if (result.number_of_rows >= result.result_size || iterator.done) {
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size || iterator.done) {
// We are done
if (!sniffing) {
if (csv_file_scan) {
Expand Down Expand Up @@ -1558,14 +1562,18 @@ void StringValueScanner::FinalizeChunkProcess() {
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
has_unterminated_quotes = true;
}
result.current_errors.HandleErrors(result);
if (result.current_errors.HandleErrors(result)) {
result.number_of_rows++;
}
}
if (states.IsQuotedCurrent() && !has_unterminated_quotes) {
// If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
// quotes
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
result.last_position);
result.current_errors.HandleErrors(result);
if (result.current_errors.HandleErrors(result)) {
result.number_of_rows++;
}
}
if (!iterator.done) {
if (iterator.pos.buffer_pos >= iterator.GetEndPos() || iterator.pos.buffer_idx > iterator.GetBufferIdx() ||
Expand All @@ -1576,9 +1584,9 @@ void StringValueScanner::FinalizeChunkProcess() {
} else {
// 2) If a boundary is not set
// We read until the chunk is complete, or we have nothing else to read.
while (!FinishedFile() && result.number_of_rows < result.result_size) {
while (!FinishedFile() && static_cast<idx_t>(result.number_of_rows) < result.result_size) {
MoveToNextBuffer();
if (result.number_of_rows >= result.result_size) {
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
return;
}
if (cur_buffer_handle) {
Expand All @@ -1588,7 +1596,7 @@ void StringValueScanner::FinalizeChunkProcess() {
iterator.done = FinishedFile();
if (result.null_padding && result.number_of_rows < STANDARD_VECTOR_SIZE && result.chunk_col_id > 0) {
while (result.chunk_col_id < result.parse_chunk.ColumnCount()) {
result.validity_mask[result.chunk_col_id++]->SetInvalid(result.number_of_rows);
result.validity_mask[result.chunk_col_id++]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
result.cur_col_id++;
}
result.number_of_rows++;
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "3-dev79"
#define DUCKDB_PATCH_VERSION "3-dev88"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.3-dev79"
#define DUCKDB_VERSION "v1.1.3-dev88"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "2635a87a56"
#define DUCKDB_SOURCE_ID "92a1ccbcef"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class StringValueResult : public ScannerResult {

//! Internal Data Chunk used for flushing
DataChunk parse_chunk;
idx_t number_of_rows = 0;
int64_t number_of_rows = 0;
idx_t cur_col_id = 0;
bool figure_out_new_line = false;
//! Information to properly handle errors
Expand Down

0 comments on commit 669906e

Please sign in to comment.