Skip to content

Commit

Permalink
branch-faster-f25519: add code for non-U64 support
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Harvey committed Jun 11, 2014
1 parent 6144b5d commit f420644
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 6 deletions.
29 changes: 27 additions & 2 deletions python-models/mult.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,30 @@
MASK = 0x1FFFFFFF
NBITS = 29

def mul64_add( u, i1, i2 ):
assert(i1 >=0 and i1 <= MASK)
assert(i2 >=0 and i2 <= MASK)
lo30 = u & 0x3FFFFFFF
hi = u >> 30

mr = (i1 & 0x7FFF) * (i2 & 0x7FFF)
lo30 += mr
mr = (i1 & 0x7FFF)*(i2 >> 15)
mr += (i1 >> 15)*(i2 & 0x7FFF)
assert(mr < (1<<32))
lo30 += (mr & 0x7FFF) << 15
assert(lo30 < (1<<32))
hi += (lo30 >> 30)
lo30 &= 0x3FFFFFFF
hi += (mr >> 15)
mr = (i1 >> 15) * (i2 >> 15)
hi += mr
assert(hi < (1<<32))
return (hi<<30) + lo30


def mul256_rs( wordX, wordY ):
rmax = 0
res = [0] * (NDIGITS * 2)
r64 = 0
for r in range(0, NDIGITS*2-1):
Expand All @@ -17,10 +40,12 @@ def mul256_rs( wordX, wordY ):
sY = (r-NDIGITS+1)
count = 2*NDIGITS-1-r
for i in range(count):
r64 += wordX[sX] * wordY[sY]
r64 = mul64_add(r64, wordX[sX], wordY[sY])
#r64 += wordX[sX] * wordY[sY]
sX -= 1
sY += 1
res[r] = r64 & MASK
rmax = max(rmax, r64)
r64 = (r64 >> NBITS)
assert(r64 <= MASK)
res[NDIGITS*2-1] = r64
Expand Down Expand Up @@ -61,7 +86,7 @@ def mul_mod(x,y):
if res >= P25519:
return res - P25519
return res

tstlist = [ 0, 1, 0x80000000, 0xFFFFFFFF, (1 << 64)-1, (1 << 64),
(1<<128)-1,
P25519-0x80000000,
Expand Down
58 changes: 54 additions & 4 deletions src/f25519mul_mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,68 @@
#define MPIMINI_INTERNAL_API
#include "f25519_mini.h"

#define USE_64BIT
#define USE_64BIT 0

#ifdef USE_64BIT
#if USE_64BIT

typedef uint64_t U64;

#define U64_CLEAR(u) ((u) = 0)
#define U64_SHIFT_BITS(u) ((u) >>= F25519MINI_BITS)
#define U64_MASK(u) ((int32_t)((u) & F25519MINI_BITMASK))
#define U64_MUL_ADD(u,i1,i2) ((u) += (U64)(i1) * (i2))
#define U64_ADD(u,i) ((u) += (i))
#define U64_MUL_ADD(u,i1,i2) ((u) += (U64)(i1) * (i2))

#else

typedef struct
{
/* Actual max value here is ~ 9 * (1<<29-1) * (1<<29-1)
i.e. 62 bits will do. This turns out to be handy! */
uint32_t lo30;
uint32_t hi;
}
U64;

#define U64_CLEAR(u) ((u).lo30 = (u).hi = 0)

static void u64_shift_bits(U64 *u)
{
u->lo30 = (u->lo30 >> 29) | ((u->hi << 1) & 0x3FFFFFFF);
u->hi >>= 29;
}
#define U64_SHIFT_BITS(u) u64_shift_bits(&(u))

#define U64_MASK(u) ((int32_t)((u).lo30 & F25519MINI_BITMASK))

static void u64_add(U64 *u, int32_t i)
{
u->lo30 += i;
u->hi += (u->lo30 >> 30);
u->lo30 &= 0x3FFFFFFF;
}
#define U64_ADD(u,i) u64_add(&(u), i)

static void u64_mul_add( U64 *u, int32_t i1, int32_t i2)
{
/* i1 and i2 are both 29 bits. */
uint32_t mr;

mr = (i1 & 0x7FFF) * (i2 & 0x7FFF);
u->lo30 += mr;

mr = (i1 & 0x7FFF)*(i2 >> 15);
mr += (i1 >> 15)*(i2 & 0x7FFF);
u->lo30 += (mr & 0x7FFF) << 15;
u->hi += (u->lo30 >> 30);
u->lo30 &= 0x3FFFFFFF;
u->hi += (mr >> 15);
mr = (i1 >> 15) * (i2 >> 15);
u->hi += mr;
}
#define U64_MUL_ADD(u,i1,i2) u64_mul_add(&(u),i1,i2)

#endif

static void u64_sum_row( U64 *sum, const int32_t *s_up, const int32_t *s_dn, int count)
{
Expand All @@ -34,7 +85,6 @@ static void u64_sum_row( U64 *sum, const int32_t *s_up, const int32_t *s_dn, int
*sum = acc;
}

#endif

typedef struct
{
Expand Down

0 comments on commit f420644

Please sign in to comment.