Skip to content

Commit

Permalink
Merge pull request #11057 from pdet/csv_custom_date
Browse files Browse the repository at this point in the history
[CSV Sniffer] Consider date/timestamp formats from the user when sniffing
  • Loading branch information
Mytherin authored Mar 9, 2024
2 parents dfc62fa + f39eb18 commit d030748
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 0 deletions.
3 changes: 3 additions & 0 deletions data/csv/custom_date.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ID,Date_Time
123,"1/2/2023, 12:33 PM"
124,"12/2/2023, 11:57 AM"
5 changes: 5 additions & 0 deletions src/execution/operator/csv_scanner/sniffer/type_detection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ void CSVSniffer::InitializeDateAndTimeStampDetection(CSVStateMachine &candidate,
auto &format_candidate = format_candidates[sql_type.id()];
if (!format_candidate.initialized) {
format_candidate.initialized = true;
// if user set a format, we add that as well
auto user_format = options.dialect_options.date_format.find(sql_type.id());
if (user_format->second.IsSetByUser()) {
format_candidate.format.emplace_back(user_format->second.GetValue().format_specifier);
}
// order by preference
auto entry = format_template_candidates.find(sql_type.id());
if (entry != format_template_candidates.end()) {
Expand Down
23 changes: 23 additions & 0 deletions test/sql/copy/csv/test_date.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,26 @@ SELECT cast(d as string) FROM date_test;
----
2019-06-05

query TT
Select * from read_csv('data/csv/custom_date.csv', header=true, dateformat='%m/%d/%Y, %-I:%-M %p', types = ['BIGINT', 'DATE'] );
----
123 2023-01-02
124 2023-12-02

query TT
Select * from read_csv('data/csv/custom_date.csv', header=true, dateformat='%m/%d/%Y, %-I:%-M %p');
----
123 2023-01-02
124 2023-12-02

query TT
Select * from read_csv('data/csv/custom_date.csv', header=true, timestampformat='%m/%d/%Y, %-I:%-M %p', types = ['BIGINT', 'TIMESTAMP'] );
----
123 2023-01-02 12:33:00
124 2023-12-02 11:57:00

query TT
Select * from read_csv('data/csv/custom_date.csv', header=true, timestampformat='%m/%d/%Y, %-I:%-M %p');
----
123 2023-01-02 12:33:00
124 2023-12-02 11:57:00

0 comments on commit d030748

Please sign in to comment.