Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql, opt: avoid full scans in mutation queries with cost flags #137984

Merged
merged 4 commits into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,7 @@ unreserved_keyword ::=
| 'ATTRIBUTE'
| 'AUTOMATIC'
| 'AVAILABILITY'
| 'AVOID_FULL_SCAN'
| 'BACKUP'
| 'BACKUPS'
| 'BACKWARD'
Expand Down Expand Up @@ -3753,6 +3754,7 @@ bare_label_keywords ::=
| 'AUTHORIZATION'
| 'AUTOMATIC'
| 'AVAILABILITY'
| 'AVOID_FULL_SCAN'
| 'BACKUP'
| 'BACKUPS'
| 'BACKWARD'
Expand Down Expand Up @@ -4345,6 +4347,7 @@ index_flags_param ::=
| 'NO_INDEX_JOIN'
| 'NO_ZIGZAG_JOIN'
| 'NO_FULL_SCAN'
| 'AVOID_FULL_SCAN'
| 'FORCE_ZIGZAG'
| 'FORCE_ZIGZAG' '=' index_name

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ DELETE FROM messages_rbr WHERE account_id NOT IN (SELECT account_id FROM message

# Delete accessing all regions should fail.
retry
statement error pq: Query has no home region\. Try adding a filter on messages_rbr\.crdb_region and/or on key column \(messages_rbr\.account_id\)\. For more information, see https://www.cockroachlabs.com/docs/stable/cost-based-optimizer.html#control-whether-queries-are-limited-to-a-single-region
statement error pq: Query has no home region\. Try adding a filter on messages_rbr\.crdb_region and/or on key column \(messages_rbr\.message\)\. For more information, see https://www.cockroachlabs.com/docs/stable/cost-based-optimizer.html#control-whether-queries-are-limited-to-a-single-region
DELETE FROM messages_rbr WHERE message = 'Hello World!'

# Insert should fail accessing all rows in messages_rbr.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,13 @@ update t
├── unique w/tombstone indexes: t_c_key
└── project
├── scan t
│ └── constraint: /12/11
│ ├── [/'one'/2 - /'one'/2]
│ ├── [/'two'/2 - /'two'/2]
│ ├── [/'three'/2 - /'three'/2]
│ ├── [/'four'/2 - /'four'/2]
│ └── [/'five'/2 - /'five'/2]
│ ├── constraint: /12/11
│ │ ├── [/'one'/2 - /'one'/2]
│ │ ├── [/'two'/2 - /'two'/2]
│ │ ├── [/'three'/2 - /'three'/2]
│ │ ├── [/'four'/2 - /'four'/2]
│ │ └── [/'five'/2 - /'five'/2]
│ └── flags: avoid-full-scan
└── projections
└── 4

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1976,16 +1976,19 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table_virt (pk, a, b) VALUES
│ ├── • values
│ │ size: 1 column, 1 row
│ │
│ └── • limit
│ │ count: 1
│ └── • index join
│ │ table: regional_by_row_table_virt@regional_by_row_table_virt_pkey
│ │
│ └── • filter
│ │ filter: ((a + b) = 2) AND ((pk != 1) OR (crdb_region != 'ap-southeast-2'))
│ └── • limit
│ │ count: 1
│ │
│ └── • scan
│ missing stats
│ table: regional_by_row_table_virt@regional_by_row_table_virt_pkey
│ spans: FULL SCAN (SOFT LIMIT)
│ └── • filter
│ │ filter: (pk != 1) OR (crdb_region != 'ap-southeast-2')
│ │
│ └── • scan
│ missing stats
│ table: regional_by_row_table_virt@regional_by_row_table_virt_v_key
│ spans: [/'ap-southeast-2'/2 - /'ap-southeast-2'/2] [/'ca-central-1'/2 - /'ca-central-1'/2] [/'us-east-1'/2 - /'us-east-1'/2]
└── • constraint-check
Expand Down Expand Up @@ -2155,16 +2158,19 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table_virt_partial (pk, a, b)
│ ├── • values
│ │ size: 1 column, 1 row
│ │
│ └── • limit
│ │ count: 1
│ └── • index join
│ │ table: regional_by_row_table_virt_partial@regional_by_row_table_virt_partial_pkey
│ │
│ └── • filter
│ │ filter: ((a + b) = 2) AND ((pk != 1) OR (crdb_region != 'ap-southeast-2'))
│ └── • limit
│ │ count: 1
│ │
│ └── • scan
│ missing stats
│ table: regional_by_row_table_virt_partial@regional_by_row_table_virt_partial_pkey
│ spans: FULL SCAN (SOFT LIMIT)
│ └── • filter
│ │ filter: (pk != 1) OR (crdb_region != 'ap-southeast-2')
│ │
│ └── • scan
│ missing stats
│ table: regional_by_row_table_virt_partial@v_v_gt_0 (partial index)
│ spans: [/'ap-southeast-2'/2 - /'ap-southeast-2'/2] [/'ca-central-1'/2 - /'ca-central-1'/2] [/'us-east-1'/2 - /'us-east-1'/2]
└── • constraint-check
Expand All @@ -2179,12 +2185,18 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table_virt_partial (pk, a, b)
│ count: 1
└── • filter
│ filter: ((a = 1) AND (b > -1)) AND ((pk != 1) OR (crdb_region != 'ap-southeast-2'))
│ filter: (a = 1) AND (b > -1)
└── • scan
missing stats
table: regional_by_row_table_virt_partial@regional_by_row_table_virt_partial_pkey
spans: FULL SCAN (SOFT LIMIT)
└── • index join
│ table: regional_by_row_table_virt_partial@regional_by_row_table_virt_partial_pkey
└── • filter
│ filter: (pk != 1) OR (crdb_region != 'ap-southeast-2')
└── • scan
missing stats
table: regional_by_row_table_virt_partial@v_a_gt_0 (partial index)
spans: FULL SCAN (SOFT LIMIT)

query T retry
SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table_virt_partial (pk, a, b) VALUES (1, 1, 1)] OFFSET 2
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3568,6 +3568,10 @@ func (m *sessionDataMutator) SetDisallowFullTableScans(val bool) {
m.data.DisallowFullTableScans = val
}

func (m *sessionDataMutator) SetAvoidFullTableScansInMutations(val bool) {
m.data.AvoidFullTableScansInMutations = val
}

func (m *sessionDataMutator) SetAlterColumnTypeGeneral(val bool) {
m.data.AlterColumnTypeGeneralEnabled = val
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/delete
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ DELETE FROM u_a AS foo USING u_b AS bar WHERE bar.a > foo.c ORDER BY bar.a DESC

# Test aliased table names, ORDER BY and LIMIT where ORDER BY references the target
# table.
query ITI
query ITI rowsort
DELETE FROM u_a AS foo USING u_b AS bar WHERE bar.a > foo.c ORDER BY foo.a DESC LIMIT 3 RETURNING foo.*;
----
7 d 35
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -3905,6 +3905,7 @@ always_distribute_full_scans off
application_name ·
authentication_method cert-password
avoid_buffering off
avoid_full_table_scans_in_mutations on
backslash_quote safe_encoding
bypass_pcr_reader_catalog_aost off
bytea_output hex
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2904,6 +2904,7 @@ application_name ·
authentication_method cert-password NULL NULL NULL string
autocommit_before_ddl off NULL NULL NULL string
avoid_buffering off NULL NULL NULL string
avoid_full_table_scans_in_mutations on NULL NULL NULL string
backslash_quote safe_encoding NULL NULL NULL string
bypass_pcr_reader_catalog_aost off NULL NULL NULL string
bytea_output hex NULL NULL NULL string
Expand Down Expand Up @@ -3108,6 +3109,7 @@ application_name ·
authentication_method cert-password NULL user NULL cert-password cert-password
autocommit_before_ddl off NULL user NULL off off
avoid_buffering off NULL user NULL off off
avoid_full_table_scans_in_mutations on NULL user NULL on on
backslash_quote safe_encoding NULL user NULL safe_encoding safe_encoding
bypass_pcr_reader_catalog_aost off NULL user NULL off off
bytea_output hex NULL user NULL hex hex
Expand Down Expand Up @@ -3305,6 +3307,7 @@ application_name NULL NULL NULL
authentication_method NULL NULL NULL NULL NULL
autocommit_before_ddl NULL NULL NULL NULL NULL
avoid_buffering NULL NULL NULL NULL NULL
avoid_full_table_scans_in_mutations NULL NULL NULL NULL NULL
backslash_quote NULL NULL NULL NULL NULL
bypass_pcr_reader_catalog_aost NULL NULL NULL NULL NULL
bytea_output NULL NULL NULL NULL NULL
Expand Down
19 changes: 19 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/select
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,25 @@ SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,NO_FULL_SCAN} WHERE b > 0
statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,NO_FULL_SCAN} WHERE a > 0 AND b = 1

# Now avoid full scans with a hint. A full scan should not cause an error.
statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN} WHERE b > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0 AND b = 1

# Now disable full scans with the session variable.
statement ok
SET disallow_full_table_scans = true;
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ application_name ·
authentication_method cert-password
autocommit_before_ddl off
avoid_buffering off
avoid_full_table_scans_in_mutations on
backslash_quote safe_encoding
bypass_pcr_reader_catalog_aost off
bytea_output hex
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/distribution/distribution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ func TestGetDistributions(t *testing.T) {
inputRel := childInput
distributeExpr := &memo.DistributeExpr{Input: childInput}
distributeRel = distributeExpr
f.Memo().SetBestProps(distributeRel, &physical.Required{}, parentProvided, 0)
f.Memo().SetBestProps(inputRel, &physical.Required{}, childProvided, 0)
f.Memo().SetBestProps(distributeRel, &physical.Required{}, parentProvided, memo.Cost{C: 0})
f.Memo().SetBestProps(inputRel, &physical.Required{}, childProvided, memo.Cost{C: 0})

targetDist, sourceDist, ok := distributeExpr.GetDistributions()
// Check if we got distributions when expected.
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
val := exec.EstimatedStats{
TableStatsAvailable: stats.Available,
RowCount: stats.RowCount,
Cost: float64(e.Cost()),
Cost: e.Cost().C,
}
if scan, ok := e.(*memo.ScanExpr); ok {
tab := b.mem.Metadata().Table(scan.Table)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/autocommit
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ WHERE message LIKE '%r$rangeid: sending batch%'
----
dist sender send r74: sending batch 1 Scan to (n1,s1):1
dist sender send r74: sending batch 1 Put to (n1,s1):1
dist sender send r74: sending batch 1 Scan to (n1,s1):1
dist sender send r74: sending batch 1 Get to (n1,s1):1
dist sender send r74: sending batch 1 EndTxn to (n1,s1):1

query B
Expand Down
67 changes: 67 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/delete
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ CREATE TABLE xyz (

# Ensure that we can use a hint to avoid a full table scan.

statement ok
SET avoid_full_table_scans_in_mutations = false

# Without the hint, we plan a full table scan.
query T
EXPLAIN (VERBOSE) DELETE FROM xyz WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000) RETURNING z
Expand Down Expand Up @@ -345,6 +348,70 @@ vectorized: true
spans: /1-/1000 /2001-/3000
locking strength: for update

# AVOID_FULL_SCAN also works to ensure a constrained scan.
query T
EXPLAIN (VERBOSE) DELETE FROM xyz@{AVOID_FULL_SCAN} WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000) RETURNING z
----
distribution: local
vectorized: true
·
• project
│ columns: (z)
└── • delete
│ columns: (x, z)
│ estimated row count: 990 (missing stats)
│ from: xyz
│ auto commit
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@xyz_y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

# We also avoid the full scan using the session setting
# avoid_full_table_scans_in_mutations.
statement ok
SET avoid_full_table_scans_in_mutations = true

query T
EXPLAIN (VERBOSE) DELETE FROM xyz WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000) RETURNING z
----
distribution: local
vectorized: true
·
• project
│ columns: (z)
└── • delete
│ columns: (x, z)
│ estimated row count: 990 (missing stats)
│ from: xyz
│ auto commit
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@xyz_y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

# Testcase for issue 105803.

statement ok
Expand Down
Loading
Loading