Skip to content

Commit

Permalink
Merge pull request #10714 from pdet/header_default_true
Browse files Browse the repository at this point in the history
[CSV Sniffer] Tweaking header detection
  • Loading branch information
Mytherin authored Feb 27, 2024
2 parents 3345f75 + 73473b5 commit 3f4f1d9
Show file tree
Hide file tree
Showing 88 changed files with 397 additions and 277 deletions.
2 changes: 2 additions & 0 deletions data/csv/headers/all_varchar.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name
Pedro
3 changes: 3 additions & 0 deletions data/csv/headers/borked_type.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date
02/01/2019
08//01/2019
1 change: 1 addition & 0 deletions data/csv/headers/integer.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
32
1 change: 1 addition & 0 deletions data/csv/headers/single_line.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
creationDate, Id
2 changes: 2 additions & 0 deletions data/csv/headers/undetected_type.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Value
68,527.00
85 changes: 69 additions & 16 deletions src/execution/operator/csv_scanner/sniffer/header_detection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,51 @@ static string NormalizeColumnName(const string &col_name) {
}
return col_name_cleaned;
}

// If our columns were set by the user, we verify if their names match with the first row
bool CSVSniffer::DetectHeaderWithSetColumn() {
bool has_header = true;
bool all_varchar = true;
bool first_row_consistent = true;
// User set the names, we must check if they match the first row
// We do a +1 to check for situations where the csv file has an extra all null column
if (set_columns.Size() != best_header_row.size() && set_columns.Size() + 1 != best_header_row.size()) {
return false;
} else {
// Let's do a match-aroo
for (idx_t i = 0; i < set_columns.Size(); i++) {
if (best_header_row[i].IsNull()) {
return false;
}
if (best_header_row[i] != (*set_columns.names)[i]) {
has_header = false;
break;
}
}
}
if (!has_header) {
// We verify if the types are consistent
for (idx_t col = 0; col < set_columns.Size(); col++) {
auto dummy_val = best_header_row[col];
// try cast to sql_type of column
const auto &sql_type = (*set_columns.types)[col];
if (sql_type != LogicalType::VARCHAR) {
all_varchar = false;
if (!TryCastValue(options.dialect_options, options.decimal_separator, dummy_val, sql_type)) {
first_row_consistent = false;
}
}
}
if (all_varchar) {
// Can't be the header
return false;
}
return !first_row_consistent;
}
return has_header;
}
void CSVSniffer::DetectHeader() {
auto &sniffer_state_machine = best_candidate->GetStateMachine();

if (best_header_row.empty()) {
sniffer_state_machine.dialect_options.header = false;
for (idx_t col = 0; col < sniffer_state_machine.dialect_options.num_cols; col++) {
Expand All @@ -119,25 +161,36 @@ void CSVSniffer::DetectHeader() {
error_handler->Error(error);
}
bool all_varchar = true;
for (idx_t col = 0; col < best_header_row.size(); col++) {
auto dummy_val = best_header_row[col];
if (!dummy_val.IsNull()) {
first_row_nulls = false;
}
bool has_header;

// try cast to sql_type of column
const auto &sql_type = best_sql_types_candidates_per_column_idx[col].back();
if (sql_type != LogicalType::VARCHAR) {
all_varchar = false;
if (!TryCastValue(sniffer_state_machine, dummy_val, sql_type)) {
first_row_consistent = false;
if (set_columns.IsSet()) {
has_header = DetectHeaderWithSetColumn();
} else {
for (idx_t col = 0; col < best_header_row.size(); col++) {
auto dummy_val = best_header_row[col];
if (!dummy_val.IsNull()) {
first_row_nulls = false;
}
// try cast to sql_type of column
const auto &sql_type = best_sql_types_candidates_per_column_idx[col].back();
if (sql_type != LogicalType::VARCHAR) {
all_varchar = false;
if (!TryCastValue(sniffer_state_machine.dialect_options,
sniffer_state_machine.options.decimal_separator, dummy_val, sql_type)) {
first_row_consistent = false;
}
}
}
// Our header is only false if types are not all varchar, and rows are consistent
if (all_varchar || first_row_nulls) {
has_header = true;
} else {
has_header = !first_row_consistent;
}
}
bool has_header;
if (!sniffer_state_machine.options.dialect_options.header.IsSetByUser()) {
has_header = (!first_row_consistent || first_row_nulls) && !all_varchar;
} else {

if (sniffer_state_machine.options.dialect_options.header.IsSetByUser()) {
// Header is defined by user, use that.
has_header = sniffer_state_machine.options.dialect_options.header.GetValue();
}
// update parser info, and read, generate & set col_names based on previous findings
Expand Down
16 changes: 9 additions & 7 deletions src/execution/operator/csv_scanner/sniffer/type_detection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,27 +84,28 @@ string GenerateDateFormat(const string &separator, const char *format_template)
return result;
}

bool CSVSniffer::TryCastValue(CSVStateMachine &candidate, const Value &value, const LogicalType &sql_type) {
bool CSVSniffer::TryCastValue(const DialectOptions &dialect_options, const string &decimal_separator,
const Value &value, const LogicalType &sql_type) {
if (value.IsNull()) {
return true;
}
if (!candidate.dialect_options.date_format.find(LogicalTypeId::DATE)->second.GetValue().Empty() &&
if (!dialect_options.date_format.find(LogicalTypeId::DATE)->second.GetValue().Empty() &&
sql_type.id() == LogicalTypeId::DATE) {
date_t result;
string error_message;
return candidate.dialect_options.date_format.find(LogicalTypeId::DATE)
return dialect_options.date_format.find(LogicalTypeId::DATE)
->second.GetValue()
.TryParseDate(string_t(StringValue::Get(value)), result, error_message);
}
if (!candidate.dialect_options.date_format.find(LogicalTypeId::TIMESTAMP)->second.GetValue().Empty() &&
if (!dialect_options.date_format.find(LogicalTypeId::TIMESTAMP)->second.GetValue().Empty() &&
sql_type.id() == LogicalTypeId::TIMESTAMP) {
timestamp_t result;
string error_message;
return candidate.dialect_options.date_format.find(LogicalTypeId::TIMESTAMP)
return dialect_options.date_format.find(LogicalTypeId::TIMESTAMP)
->second.GetValue()
.TryParseTimestamp(string_t(StringValue::Get(value)), result, error_message);
}
if (candidate.options.decimal_separator != "." && (sql_type.id() == LogicalTypeId::DOUBLE)) {
if (decimal_separator != "." && (sql_type.id() == LogicalTypeId::DOUBLE)) {
return TryCastFloatingOperator::Operation<TryCastErrorMessageCommaSeparated, double>(StringValue::Get(value));
}
Value new_value;
Expand Down Expand Up @@ -240,7 +241,8 @@ void CSVSniffer::DetectTypes() {
// Nothing to convert it to
continue;
}
if (TryCastValue(sniffing_state_machine, dummy_val, sql_type)) {
if (TryCastValue(sniffing_state_machine.dialect_options,
sniffing_state_machine.options.decimal_separator, dummy_val, sql_type)) {
break;
} else {
if (row_idx != start_idx_detection && cur_top_candidate == LogicalType::BOOLEAN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
names = bind_data.column_info[file_idx].names;
types = bind_data.column_info[file_idx].types;
options.dialect_options.num_cols = names.size();
if (options.auto_detect) {
CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
sniffer.SniffCSV();
}
state_machine = make_shared<CSVStateMachine>(
state_machine_cache.Get(options.dialect_options.state_machine_options), options);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ class CSVSniffer {
void DetectTypes();
//! Change the date format for the type to the string
//! Try to cast a string value to the specified sql type
bool TryCastValue(CSVStateMachine &candidate, const Value &value, const LogicalType &sql_type);
bool TryCastValue(const DialectOptions &dialect_options, const string &decimal_separator, const Value &value,
const LogicalType &sql_type);
void SetDateFormat(CSVStateMachine &candidate, const string &format_specifier, const LogicalTypeId &sql_type);

//! Function that initialized the necessary variables used for date and timestamp detection
Expand Down Expand Up @@ -178,6 +179,7 @@ class CSVSniffer {
//! ------------------ Header Detection ----------------- //
//! ------------------------------------------------------//
void DetectHeader();
bool DetectHeaderWithSetColumn();
vector<string> names;

//! ------------------------------------------------------//
Expand Down
6 changes: 3 additions & 3 deletions test/optimizer/csv_pushdown.test
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Column at position: 13 Set type: DATE Sniffed type: VARCHAR

# conversion is skipped if we don't read the value - so even with the incorrect type specified this still works
query I
SELECT l_returnflag FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
SELECT l_returnflag FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, header = 0);
----
N
N
Expand All @@ -56,7 +56,7 @@ SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/d
l_extendedprice

query III
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'SMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true);
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'SMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true, header = 0);
----
1 15519 24387
1 6370 10211
Expand All @@ -69,7 +69,7 @@ SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/d
l_extendedprice

query III
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'USMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true);
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('test/sql/copy/csv/data/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'USMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true, header = 0);
----
1 15519 24387
1 6731 58958
Expand Down
4 changes: 2 additions & 2 deletions test/parallel_csv/test_parallel_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ TEST_CASE("Test Parallel CSV All Files - test/sql/copy/csv/data", "[parallel-csv

//! Test case with specific parameters that allow us to run the no_quote.tsv we were skipping
TEST_CASE("Test Parallel CSV All Files - test/sql/copy/csv/data/no_quote.csv", "[parallel-csv][.]") {
string add_parameters = ", header=1, quote=''";
string add_parameters = ", quote=''";
string file = "test/sql/copy/csv/data/no_quote.csv";
REQUIRE(RunFull(file, nullptr, add_parameters));
}
Expand Down Expand Up @@ -242,7 +242,7 @@ TEST_CASE("Test Parallel CSV All Files - data/csv", "[parallel-csv][.]") {

//! Test case with specific parameters that allow us to run the bug_7578.csv we were skipping
TEST_CASE("Test Parallel CSV All Files - data/csv/bug_7578.csv", "[parallel-csv][.]") {
string add_parameters = ", delim=\'\\t\', header=true, quote = \'`\', columns={ \'transaction_id\': \'VARCHAR\', "
string add_parameters = ", delim=\'\\t\', quote = \'`\', columns={ \'transaction_id\': \'VARCHAR\', "
"\'team_id\': \'INT\', \'direction\': \'INT\', \'amount\':\'DOUBLE\', "
"\'account_id\':\'INT\', \'transaction_date\':\'DATE\', \'recorded_date\':\'DATE\', "
"\'tags.transaction_id\':\'VARCHAR\', \'tags.team_id\':\'INT\', \'tags\':\'varchar\'}";
Expand Down
2 changes: 1 addition & 1 deletion test/sql/cast/string_to_list_cast.test
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ statement ok
CREATE TABLE int_list(col INT[]);

statement ok
COPY (SELECT [1,2,3]) TO '__TEST_DIR__/int_list.csv' (Header 0);
COPY (SELECT [1,2,3]) TO '__TEST_DIR__/int_list.csv';

statement ok
COPY int_list FROM '__TEST_DIR__/int_list.csv';
Expand Down
2 changes: 1 addition & 1 deletion test/sql/cast/string_to_map_cast.test
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ statement ok
CREATE TABLE assembled_maps(col1 MAP(INT, VARCHAR));

statement ok
COPY (SELECT '{8="hello, DuckDB"}') TO '__TEST_DIR__/assembled_maps.csv' (Header 0);
COPY (SELECT '{8="hello, DuckDB"}') TO '__TEST_DIR__/assembled_maps.csv';

statement ok
COPY assembled_maps FROM '__TEST_DIR__/assembled_maps.csv';
Expand Down
2 changes: 1 addition & 1 deletion test/sql/cast/string_to_struct_cast.test
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ statement ok
CREATE TABLE assorted_structs(col1 STRUCT(a INT, b VARCHAR));

statement ok
COPY (SELECT '{a: 8, b: "hello, DuckDB"}') TO '__TEST_DIR__/assorted_structs.csv' (Header 0);
COPY (SELECT '{a: 8, b: "hello, DuckDB"}') TO '__TEST_DIR__/assorted_structs.csv';

statement ok
COPY assorted_structs FROM '__TEST_DIR__/assorted_structs.csv';
Expand Down
4 changes: 2 additions & 2 deletions test/sql/copy/csv/auto/test_auto_8573.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ statement ok
PRAGMA verify_parallelism

query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv('data/csv/big_number.csv', HEADER=TRUE, COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)

query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv_auto('data/csv/big_number.csv', HEADER=TRUE, COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv_auto('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)
2 changes: 1 addition & 1 deletion test/sql/copy/csv/auto/test_auto_8649.test
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PRAGMA verify_parallelism

# Sample
query I
SELECT * FROM read_csv_auto("data/csv/dim0.csv", header = 1) ;
SELECT * FROM read_csv_auto("data/csv/dim0.csv") ;
----
T
0
4 changes: 2 additions & 2 deletions test/sql/copy/csv/auto/test_auto_ontime.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ statement ok
CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled DECIMAL(8,2), cancellationcode CHAR(1), diverted DECIMAL(8,2), crselapsedtime DECIMAL(8,2), actualelapsedtime DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup DECIMAL(8,2), carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));

query I
COPY ontime FROM 'test/sql/copy/csv/data/real/ontime_sample.csv' (HEADER TRUE);
COPY ontime FROM 'test/sql/copy/csv/data/real/ontime_sample.csv';
----
9

Expand All @@ -31,7 +31,7 @@ statement ok
CREATE TABLE ontime2 AS SELECT * FROM ontime LIMIT 0

statement ok
COPY ontime2 FROM 'test/sql/copy/csv/data/real/ontime_sample.csv' (HEADER TRUE);
COPY ontime2 FROM 'test/sql/copy/csv/data/real/ontime_sample.csv';

query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM ontime EXCEPT SELECT * FROM ontime2)
Expand Down
1 change: 0 additions & 1 deletion test/sql/copy/csv/auto/test_date_format_bug_linux.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
query I
SELECT * FROM read_csv_auto('test/sql/copy/csv/data/auto/date_format_bug_linux.csv')
----
test
8cb123cb8
34fd321
fg5391jn4
46 changes: 43 additions & 3 deletions test/sql/copy/csv/auto/test_header_detection.test
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ DROP TABLE test;

# CSV file with two lines both only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('test/sql/copy/csv/data/auto/varchar_multi_line.csv');
CREATE TABLE test AS SELECT * FROM read_csv_auto ('test/sql/copy/csv/data/auto/varchar_multi_line.csv', header = 0);

query TTT
SELECT * FROM test ORDER BY column0;
Expand All @@ -68,7 +68,7 @@ DROP TABLE test;

# CSV file with one line, two columns, only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('test/sql/copy/csv/data/auto/varchar_single_line.csv');
CREATE TABLE test AS SELECT * FROM read_csv_auto ('test/sql/copy/csv/data/auto/varchar_single_line.csv', header = 0);

query TT
SELECT column0, column1 FROM test ORDER BY column0;
Expand Down Expand Up @@ -97,7 +97,6 @@ CREATE TABLE test AS SELECT * FROM read_csv_auto ('test/sql/copy/csv/data/auto/s
query T
SELECT * FROM test;
----
Test

statement ok
DROP TABLE test;
Expand Down Expand Up @@ -132,3 +131,44 @@ SELECT id FROM test;

statement ok
DROP TABLE test;

statement ok
CREATE TABLE my_varchars(a VARCHAR, b VARCHAR, c VARCHAR);

statement ok
INSERT INTO my_varchars VALUES ('Hello', 'Beautiful', 'World');

statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_header.csv' (HEADER 1);

statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_no_header.csv' (HEADER 0);

statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_header.csv';

statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_no_header.csv';

query III
FROM my_varchars;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World

statement ok
COPY my_varchars TO '__TEST_DIR__/big_varchar.csv';

statement ok
COPY my_varchars FROM '__TEST_DIR__/big_varchar.csv';

query III
FROM my_varchars;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Loading

0 comments on commit 3f4f1d9

Please sign in to comment.