Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize ecrecover ASM #840

Merged
merged 48 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
3fefe66
windowed mul
wborgeaud Nov 9, 2022
9f05bc3
Working
wborgeaud Nov 10, 2022
a040ff8
Window of 4 bits
wborgeaud Nov 10, 2022
5219bab
Fix
wborgeaud Nov 10, 2022
5d7c9f9
Comments
wborgeaud Nov 14, 2022
41555a9
Unroll loop
wborgeaud Nov 16, 2022
3461f75
Unroll loop
wborgeaud Nov 16, 2022
0840911
remove global
wborgeaud Nov 16, 2022
41bf153
Minor
wborgeaud Nov 18, 2022
a28c217
Minor
wborgeaud Nov 18, 2022
a7f7568
Implement `CALLVALUE, CALLDATALOAD, CALLDATASIZE, CALLDATACOPY` in in…
wborgeaud Nov 18, 2022
bbc13dc
Merge branch 'calldata_opcodes' into evm_secp_glv
wborgeaud Nov 18, 2022
1d87530
Minor
wborgeaud Nov 19, 2022
a96af5c
Doesn't work
wborgeaud Nov 21, 2022
4c97518
Minor
wborgeaud Dec 5, 2022
d2edaa3
Minor
wborgeaud Dec 5, 2022
be68c6b
wnaf msm
wborgeaud Dec 6, 2022
dff7593
Working hardcoded values: 28657 opcodes
wborgeaud Dec 6, 2022
99cc05a
Working wnaf
wborgeaud Dec 7, 2022
138c1df
Small wnaf optim
wborgeaud Dec 7, 2022
5a8783b
Precompute works
wborgeaud Dec 7, 2022
d39e9c9
Working together
wborgeaud Dec 7, 2022
33ede19
Bump to 129 bits
wborgeaud Dec 8, 2022
1204b4a
Working glv decomposition
wborgeaud Dec 8, 2022
2121790
Working MSM with GLV
wborgeaud Dec 8, 2022
2ddf4e3
Almost working
wborgeaud Dec 8, 2022
83157c3
Working
wborgeaud Dec 8, 2022
041c98c
ECC test folder
wborgeaud Dec 8, 2022
93285df
Working with real sig data
wborgeaud Dec 8, 2022
00da66e
Fix tests + Clippy
wborgeaud Dec 8, 2022
835ee56
Minor
wborgeaud Dec 8, 2022
0564144
Cleaning
wborgeaud Dec 9, 2022
b86001b
Comments
wborgeaud Dec 9, 2022
ebf5df0
Merge branch 'main' into evm_secp_glv
wborgeaud Dec 9, 2022
a55b9aa
Cleaning
wborgeaud Dec 9, 2022
bafe340
Smaller glv test file
wborgeaud Dec 9, 2022
269eb12
Print opcode count at the end of interpreter run
wborgeaud Dec 9, 2022
cb001d8
More constants
wborgeaud Dec 9, 2022
e9293d6
Add z3 proof that the GLV scalars are 129-bit or less
wborgeaud Dec 9, 2022
84c1683
Minor change to z3 proof
wborgeaud Dec 9, 2022
69e1290
Merge branch 'main' into evm_secp_glv
wborgeaud Dec 13, 2022
c45e9c3
Minor
wborgeaud Dec 15, 2022
d413b05
Hamish's suggestion
wborgeaud Jan 13, 2023
ccc225a
Working
wborgeaud Jan 13, 2023
c01b0e3
Cleaning
wborgeaud Jan 13, 2023
3e3e722
Clippy
wborgeaud Jan 13, 2023
5da15fa
PR feedback
wborgeaud Jan 31, 2023
0fe940f
Minor PR feedback
wborgeaud Jan 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Cleaning
  • Loading branch information
wborgeaud committed Jan 13, 2023
commit c01b0e3684b0b0805c77cd08d5a9945b21d692e5
2 changes: 0 additions & 2 deletions evm/src/cpu/kernel/aggregator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ pub(crate) fn combined_kernel() -> Kernel {
include_str!("asm/curve/secp256k1/lift_x.asm"),
include_str!("asm/curve/secp256k1/moddiv.asm"),
include_str!("asm/curve/secp256k1/glv.asm"),
include_str!("asm/curve/secp256k1/msm.asm"),
include_str!("asm/curve/secp256k1/wnaf.asm"),
include_str!("asm/curve/secp256k1/precomputation.asm"),
include_str!("asm/exp.asm"),
include_str!("asm/fields/fp6_macros.asm"),
Expand Down
45 changes: 6 additions & 39 deletions evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,12 @@ ecrecover_valid_input:
// stack: u2, u1, x, y, pubkey_to_addr, retdest
%jump(ecdsa_msm_with_glv)

// Computes `a * G + b * Q` using GLV+wNAF, where `G` is the Secp256k1 generator and `Q` is a point on the curve.
// Computes `a * G + b * Q` using GLV+precomputation, where `G` is the Secp256k1 generator and `Q` is a point on the curve.
// Pseudo-code:
// precompute_table(G) -- precomputation table for the base point, stored in memory
// precompute_table(G) -- precomputation table for the combinations of `G, phi(G), Q, phi(Q)`.
// let a0, a1 = glv_decompose(a)
// let wnaf_a0 = wnaf(a0) -- the wNAF values are stored in memory
// let wnaf_a1 = wnaf(a0)
// let b0, b1 = glv_decompose(b)
// let wnaf_b0 = wnaf(b0)
// let wnaf_b1 = wnaf(b0)
// precompute_table(Q) -- precomputation table for `Q`, stored in memory
// return msm_with_wnaf([wnaf_a0, wnaf_a1, wnaf_b0, wnaf_b1], [G, phi(G), Q, phi(Q)]) -- phi is the Secp endomorphism.
// return msm_with_precomputation([a0, a1, b0, b1], [G, phi(G), Q, phi(Q)]) -- phi is the Secp endomorphism.
ecdsa_msm_with_glv:
%stack (a, b, Qx, Qy, retdest) -> (a, ecdsa_after_glv_a, b, Qx, Qy, retdest)
%jump(glv_decompose)
Expand All @@ -87,8 +82,7 @@ ecdsa_after_glv_a:
%jump(glv_decompose)
ecdsa_after_glv_b:
%stack (b1neg, b0, b1, a1neg, a0, a1, Qx, Qy, retdest) -> (a1neg, b1neg, Qx, Qy, ecdsa_after_precompute, a0, a1, b0, b1, retdest)
global wth:
%jump(precompute_table_base_point)
%jump(precompute_table)
ecdsa_after_precompute:
// stack: a0, a1, b0, b1, retdest
PUSH 0 PUSH 0 PUSH 129
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -105,10 +99,9 @@ ecdsa_after_precompute_loop:
%mul_const(2) ADD %mul_const(2) ADD %mul_const(2) ADD
%stack (index, i, accx, accy, a0, a1, b0, b1, retdest) -> (index, index, i, accx, accy, a0, a1, b0, b1, retdest)
%mul_const(2) %add_const(1)
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
SWAP1 %mul_const(2)
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
global wtf:
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%stack (Px, Py, i, accx, accy, a0, a1, b0, b1, retdest) -> (Px, Py, accx, accy, ecdsa_after_precompute_loop_contd, i, a0, a1, b0, b1, retdest)
%jump(ec_add_valid_points_secp)
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
ecdsa_after_precompute_loop_contd:
Expand All @@ -122,32 +115,6 @@ ecdsa_after_precompute_loop_end:
%stack (accx, accy, ecdsa_after_precompute_loop_contd2, i, a0, a1, b0, b1, retdest) -> (retdest, accx, accy)
JUMP

/* ecdsa_after_a0:
// stack: a1, b, Qx, Qy, retdest
PUSH ecdsa_after_a1 SWAP1 PUSH @SEGMENT_KERNEL_WNAF_B %jump(wnaf)
ecdsa_after_a1:
// stack: b, Qx, Qy, retdest
%stack (b, Qx, Qy, retdest) -> (b, ecdsa_after_glv_b, Qx, Qy, retdest)
%jump(glv_decompose)
ecdsa_after_glv_b:
// stack: b1neg, b0, b1, Qx, Qy, retdest
// Store b1neg at this (otherwise unused) location. Will be used later in the MSM.
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_Q, 1337)
// stack: b0, b1, Qx, Qy, retdest
PUSH ecdsa_after_b0 SWAP1 PUSH @SEGMENT_KERNEL_WNAF_C %jump(wnaf)
ecdsa_after_b0:
// stack: d, Qx, Qy, retdest
PUSH ecdsa_after_b1 SWAP1 PUSH @SEGMENT_KERNEL_WNAF_D %jump(wnaf)
ecdsa_after_b1:
%stack (Qx, Qy, retdest) -> (Qx, Qy, ecdsa_after_precompute, retdest)
%jump(precompute_table)
ecdsa_after_precompute:
// stack: retdest
%jump(ecdsa_msm)
%stack (accx, accy, retdest) -> (retdest, accx, accy)
JUMP */


// Take a public key (PKx, PKy) and return the associated address KECCAK256(PKx || PKy)[-20:].
pubkey_to_addr:
// stack: PKx, PKy, retdest
Expand Down
153 changes: 0 additions & 153 deletions evm/src/cpu/kernel/asm/curve/secp256k1/msm.asm

This file was deleted.

114 changes: 36 additions & 78 deletions evm/src/cpu/kernel/asm/curve/secp256k1/precomputation.asm
Original file line number Diff line number Diff line change
@@ -1,113 +1,71 @@
// Precompute a table of multiples of the Secp256k1 point `Q = (Qx, Qy)`.
// Let `(Qxi, Qyi) = i * Q`, then store in the `SEGMENT_KERNEL_ECDSA_TABLE_Q` segment of memory the values
// `i-1 => Qxi`, `i => Qyi if i < 16 else -Qy(32-i)` for `i in range(1, 32, 2)`.
// Initial stack: Gneg, Qneg, Qx, Qy, retdest
// Compute a*G ± b*phi(G) + c*Q ± d*phi(Q) for a,b,c,d in {0,1}^4 and store it at location `8a+4b+2c+d` in the SEGMENT_KERNEL_ECDSA_TABLE segment.
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
global precompute_table:
// stack: Qx, Qy, retdest
PUSH precompute_table_contd DUP3 DUP3
%jump(ec_double_secp)
precompute_table_contd:
// stack: Qx2, Qy2, Qx, Qy, retdest
PUSH 1
global precompute_table_loop:
// stack i, Qx2, Qy2, Qx, Qy, retdest
PUSH 1 DUP2 SUB
%stack (im, i, Qx2, Qy2, Qx, Qy, retdest) -> (i, Qy, im, Qx, i, Qx2, Qy2, Qx, Qy, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_Q) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_Q)
// stack: i, Qx2, Qy2, Qx, Qy, retdest
DUP1 PUSH 32 SUB PUSH 1 DUP2 SUB
// stack: 31-i, 32-i, i, Qx2, Qy2, Qx, Qy, retdest
DUP7 PUSH @SECP_BASE SUB
// TODO: Could maybe avoid storing Qx a second time here, not sure if it would be more efficient.
%stack (Qyy, iii, ii, i, Qx2, Qy2, Qx, Qy, retdest) -> (iii, Qx, ii, Qyy, i, Qx2, Qy2, Qx, Qy, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_Q) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_Q)
// stack: i, Qx2, Qy2, Qx, Qy, retdest
PUSH 2 ADD
// stack: i+2, Qx2, Qy2, Qx, Qy, retdest
DUP1 PUSH 16 LT %jumpi(precompute_table_end)
%stack (i, Qx2, Qy2, Qx, Qy, retdest) -> (Qx, Qy, Qx2, Qy2, precompute_table_loop_contd, i, Qx2, Qy2, retdest)
%jump(ec_add_valid_points_secp)
precompute_table_loop_contd:
%stack (Qx, Qy, i, Qx2, Qy2, retdest) -> (i, Qx2, Qy2, Qx, Qy, retdest)
%jump(precompute_table_loop)

precompute_table_end:
// stack: i, Qx2, Qy2, Qx, Qy, retdest
%pop5 JUMP


// Same as if the `precompute_table` above was called on the base point, but with values hardcoded.
// TODO: Could be called only once in a tx execution after the bootstrapping phase for example.
global precompute_table_base_point:
// First store G, ± phi(G), G ± phi(G)
// stack: Gneg, Qneg, Qx, Qy, retdest

PUSH 32670510020758816978083085130507043184471273380659243275938904335757337482424 PUSH 17 PUSH 55066263022277343669578718895168534326250603453777594175500187360389116729240 PUSH 16
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
/* DUP1 DUP1 %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) SWAP1 PUSH 1 SUB %mul_const(32670510020758816978083085130507043184471273380659243275938904335757337482424) ADD
PUSH 9 PUSH 85340279321737800624759429340272274763154997815782306132637707972559913914315 PUSH 8
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
DUP1 %mul_const(100652675408719987021357910538015346127426077519185866739835120963490438734674) SWAP1 PUSH 1 SUB %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) ADD
PUSH 25 PUSH 91177636130617246552803821781935006617134368061721227770777272682868638699771 PUSH 24 */
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
DUP1 DUP1 %mul_const(32670510020758816978083085130507043184471273380659243275938904335757337482424) SWAP1 PUSH 1 SUB %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) ADD
PUSH 9 PUSH 85340279321737800624759429340272274763154997815782306132637707972559913914315 PUSH 8
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
DUP1 DUP1 %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) SWAP1 PUSH 1 SUB %mul_const(100652675408719987021357910538015346127426077519185866739835120963490438734674) ADD
PUSH 25
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
DUP1 %mul_const(91177636130617246552803821781935006617134368061721227770777272682868638699771) SWAP1 PUSH 1 SUB %mul_const(66837770201594535779099350687042404727408598709762866365333192677982385899440) ADD
PUSH 24
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)


%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)

// Then store Q, ±phi(Q), Q ± phi(Q)
%stack (Qneg, Qx, Qy, retdest) -> (4, Qx, 5, Qy, Qx, @SECP_BASE, Qneg, Qx, Qy, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
// stack: Qx, @SECP_BASE, Qx, Qy, retdest
PUSH @SECP_GLV_BETA MULMOD
%stack (betaQx, Qneg, Qx, Qy, retdest) -> (Qneg, Qy, Qneg, betaQx, Qx, Qy, retdest)
MUL SWAP1 PUSH 1 SUB
// stack: 1-Qneg, Qneg*Qy, betaQx, Qx, Qy, retdest
DUP5 PUSH @SECP_BASE SUB MUL ADD
// SUB MUL SWAP1 DUP5 PUSH @SECP_BASE SUB MUL ADD
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
%stack (selectQy, betaQx, Qx, Qy, retdest) -> (2, betaQx, 3, selectQy, betaQx, selectQy, Qx, Qy, precompute_table_base_point_contd, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%stack (selectQy, betaQx, Qx, Qy, retdest) -> (2, betaQx, 3, selectQy, betaQx, selectQy, Qx, Qy, precompute_table_contd, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%jump(ec_add_valid_points_secp)
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
precompute_table_base_point_contd:
precompute_table_contd:
%stack (x, y, retdest) -> (6, x, 7, y, retdest)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH 2
precompute_table_base_point_loop:
// Use a loop to store a*G ± b*phi(G) + c*Q ± d*phi(Q) for a,b,c,d in {0,1}^4.
precompute_table_loop:
// stack: i, retdest
DUP1 %increment %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
DUP1 %increment %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%stack (y, i, retdest) -> (i, y, i, retdest)
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
PUSH precompute_table_base_point_loop_contd
%mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH precompute_table_loop_contd
DUP3 DUP3
PUSH 9 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
PUSH 8 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
// stack: Gx, Gy, x, y, precompute_table_base_point_loop_contd, x, y, i, retdest
PUSH 9 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH 8 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
// stack: Gx, Gy, x, y, precompute_table_loop_contd, x, y, i, retdest
%jump(ec_add_valid_points_secp)
global precompute_table_base_point_loop_contd:
global precompute_table_loop_contd:
wborgeaud marked this conversation as resolved.
Show resolved Hide resolved
%stack (Rx, Ry, x, y, i, retdest) -> (i, 8, Rx, i, 9, Ry, x, y, i, retdest)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
DUP2 DUP2
PUSH 17 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
PUSH 16 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%stack (Gx, Gy, x, y, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_base_point_loop_contd2, x, y, i, retdest)
PUSH 17 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH 16 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%stack (Gx, Gy, x, y, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_loop_contd2, x, y, i, retdest)
%jump(ec_add_valid_points_secp)
precompute_table_base_point_loop_contd2:
precompute_table_loop_contd2:
%stack (Rx, Ry, x, y, i, retdest) -> (i, 16, Rx, i, 17, Ry, x, y, i, retdest)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
PUSH 25 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
PUSH 24 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
%stack (Gx, Gy, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_base_point_loop_contd3, i, retdest)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH 25 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
PUSH 24 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%stack (Gx, Gy, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_loop_contd3, i, retdest)
%jump(ec_add_valid_points_secp)
global precompute_table_base_point_loop_contd3:
global precompute_table_loop_contd3:
%stack (Rx, Ry, i, retdest) -> (i, 24, Rx, i, 25, Ry, i, retdest)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE_G)
ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE)
%add_const(2)
DUP1 %eq_const(8) %jumpi(precompute_table_end_yo)
%jump(precompute_table_base_point_loop)
DUP1 %eq_const(8) %jumpi(precompute_table_end)
%jump(precompute_table_loop)

precompute_table_end_yo:
precompute_table_end:
// stack: i, retdest
POP JUMP
Loading