From f42064484b2ec3a73b3ab697a46ecacfad34a39a Mon Sep 17 00:00:00 2001 From: Ian Harvey Date: Wed, 11 Jun 2014 21:28:14 +0100 Subject: [PATCH] branch-faster-f25519: add code for non-U64 support --- python-models/mult.py | 29 ++++++++++++++++++++-- src/f25519mul_mini.c | 58 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/python-models/mult.py b/python-models/mult.py index 7a50292..7de430b 100644 --- a/python-models/mult.py +++ b/python-models/mult.py @@ -4,7 +4,30 @@ MASK = 0x1FFFFFFF NBITS = 29 +def mul64_add( u, i1, i2 ): + assert(i1 >=0 and i1 <= MASK) + assert(i2 >=0 and i2 <= MASK) + lo30 = u & 0x3FFFFFFF + hi = u >> 30 + + mr = (i1 & 0x7FFF) * (i2 & 0x7FFF) + lo30 += mr + mr = (i1 & 0x7FFF)*(i2 >> 15) + mr += (i1 >> 15)*(i2 & 0x7FFF) + assert(mr < (1<<32)) + lo30 += (mr & 0x7FFF) << 15 + assert(lo30 < (1<<32)) + hi += (lo30 >> 30) + lo30 &= 0x3FFFFFFF + hi += (mr >> 15) + mr = (i1 >> 15) * (i2 >> 15) + hi += mr + assert(hi < (1<<32)) + return (hi<<30) + lo30 + + def mul256_rs( wordX, wordY ): + rmax = 0 res = [0] * (NDIGITS * 2) r64 = 0 for r in range(0, NDIGITS*2-1): @@ -17,10 +40,12 @@ def mul256_rs( wordX, wordY ): sY = (r-NDIGITS+1) count = 2*NDIGITS-1-r for i in range(count): - r64 += wordX[sX] * wordY[sY] + r64 = mul64_add(r64, wordX[sX], wordY[sY]) + #r64 += wordX[sX] * wordY[sY] sX -= 1 sY += 1 res[r] = r64 & MASK + rmax = max(rmax, r64) r64 = (r64 >> NBITS) assert(r64 <= MASK) res[NDIGITS*2-1] = r64 @@ -61,7 +86,7 @@ def mul_mod(x,y): if res >= P25519: return res - P25519 return res - + tstlist = [ 0, 1, 0x80000000, 0xFFFFFFFF, (1 << 64)-1, (1 << 64), (1<<128)-1, P25519-0x80000000, diff --git a/src/f25519mul_mini.c b/src/f25519mul_mini.c index af0521a..257392e 100644 --- a/src/f25519mul_mini.c +++ b/src/f25519mul_mini.c @@ -10,17 +10,68 @@ #define MPIMINI_INTERNAL_API #include "f25519_mini.h" -#define USE_64BIT +#define USE_64BIT 0 -#ifdef USE_64BIT +#if USE_64BIT typedef uint64_t U64; #define U64_CLEAR(u) ((u) = 0) #define U64_SHIFT_BITS(u) ((u) >>= F25519MINI_BITS) #define U64_MASK(u) ((int32_t)((u) & F25519MINI_BITMASK)) -#define U64_MUL_ADD(u,i1,i2) ((u) += (U64)(i1) * (i2)) #define U64_ADD(u,i) ((u) += (i)) +#define U64_MUL_ADD(u,i1,i2) ((u) += (U64)(i1) * (i2)) + +#else + +typedef struct +{ + /* Actual max value here is ~ 9 * (1<<29-1) * (1<<29-1) + i.e. 62 bits will do. This turns out to be handy! */ + uint32_t lo30; + uint32_t hi; +} + U64; + +#define U64_CLEAR(u) ((u).lo30 = (u).hi = 0) + +static void u64_shift_bits(U64 *u) +{ + u->lo30 = (u->lo30 >> 29) | ((u->hi << 1) & 0x3FFFFFFF); + u->hi >>= 29; +} +#define U64_SHIFT_BITS(u) u64_shift_bits(&(u)) + +#define U64_MASK(u) ((int32_t)((u).lo30 & F25519MINI_BITMASK)) + +static void u64_add(U64 *u, int32_t i) +{ + u->lo30 += i; + u->hi += (u->lo30 >> 30); + u->lo30 &= 0x3FFFFFFF; +} +#define U64_ADD(u,i) u64_add(&(u), i) + +static void u64_mul_add( U64 *u, int32_t i1, int32_t i2) +{ + /* i1 and i2 are both 29 bits. */ + uint32_t mr; + + mr = (i1 & 0x7FFF) * (i2 & 0x7FFF); + u->lo30 += mr; + + mr = (i1 & 0x7FFF)*(i2 >> 15); + mr += (i1 >> 15)*(i2 & 0x7FFF); + u->lo30 += (mr & 0x7FFF) << 15; + u->hi += (u->lo30 >> 30); + u->lo30 &= 0x3FFFFFFF; + u->hi += (mr >> 15); + mr = (i1 >> 15) * (i2 >> 15); + u->hi += mr; +} +#define U64_MUL_ADD(u,i1,i2) u64_mul_add(&(u),i1,i2) + +#endif static void u64_sum_row( U64 *sum, const int32_t *s_up, const int32_t *s_dn, int count) { @@ -34,7 +85,6 @@ static void u64_sum_row( U64 *sum, const int32_t *s_up, const int32_t *s_dn, int *sum = acc; } -#endif typedef struct {