Skip to content

Commit

Permalink
Performance increase
Browse files Browse the repository at this point in the history
-Performance increase when searching compressed keys
-Fixed broken build on Linux
-Removed unused code
  • Loading branch information
brichard19 committed Jun 24, 2018
1 parent 85b9c64 commit f3a9a7d
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 80 deletions.
2 changes: 1 addition & 1 deletion KeyFinder/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ CPPSRC:=$(wildcard *.cpp)
all:
${CXX} -o KeyFinder.bin ${CPPSRC} ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS} ${LIBS} -L${CUDA_LIB} -lkeyfinder -laddressutil -lsecp256k1 -lcryptoutil -lsecp256k1 -lcudadevicecontext -lcudautil -lutil -lcudart -lcmdparse
mkdir -p $(BINDIR)
cp KeyFinder.bin $(BINDIR)/keyFinder
cp KeyFinder.bin $(BINDIR)/BitCrack

clean:
rm -rf KeyFinder.bin
4 changes: 2 additions & 2 deletions KeyFinder/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ static std::string _outputFile = "";
*/
void resultCallback(KeyFinderResultInfo info)
{
printf("\r");
printf("\n");
printf("Address: %s\n", info.address.c_str());
printf("Private key: %s\n", info.privateKey.toString(16).c_str());
printf("Compressed: %s\n", info.compressed ? "yes" : "no");
printf("Public key: ");
if(info.compressed) {
printf("%s\n", info.publicKey.toString(true).c_str());
} else {
printf("%s\n %s\n", info.publicKey.x.toString(16).c_str(), info.publicKey.y.toString(16).c_str());
printf("%s\n %s\n", info.publicKey.x.toString(16).c_str(), info.publicKey.y.toString(16).c_str());
}

if(_outputFile.length() != 0) {
Expand Down
2 changes: 2 additions & 0 deletions KeyFinderLib/KeyFinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ void KeyFinder::init()

_devCtx->init(params);

cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);

// Copy points to device
generateStartingPoints();
_devCtx->copyPoints(_startingPoints);
Expand Down
28 changes: 19 additions & 9 deletions KeyFinderLib/KeyFinder.cu
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,11 @@ __device__ void hashPublicKey(const unsigned int *x, const unsigned int *y, unsi
ripemd160sha256NoFinal(hash, digestOut);
}

__device__ void hashPublicKeyCompressed(const unsigned int *x, const unsigned int *y, unsigned int *digestOut)
__device__ void hashPublicKeyCompressed(const unsigned int *x, unsigned int yParity, unsigned int *digestOut)
{
unsigned int hash[8];

sha256PublicKeyCompressed(x, y, hash);
sha256PublicKeyCompressed(x, yParity, hash);

// Swap to little-endian
for(int i = 0; i < 8; i++) {
Expand Down Expand Up @@ -249,13 +249,16 @@ __device__ void doIteration(unsigned int *xPtr, unsigned int *yPtr, unsigned int
unsigned int inverse[8] = { 0,0,0,0,0,0,0,1 };
for(int i = 0; i < pointsPerThread; i++) {
unsigned int x[8];
unsigned int y[8];

unsigned int digest[5];

readInt(xPtr, i, x);
readInt(yPtr, i, y);


if(compression == PointCompressionType::UNCOMPRESSED || compression == PointCompressionType::BOTH) {
unsigned int y[8];
readInt(yPtr, i, y);

hashPublicKey(x, y, digest);

if(checkHash(digest)) {
Expand All @@ -264,14 +267,16 @@ __device__ void doIteration(unsigned int *xPtr, unsigned int *yPtr, unsigned int
}

if(compression == PointCompressionType::COMPRESSED || compression == PointCompressionType::BOTH) {
hashPublicKeyCompressed(x, y, digest);
hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);

if(checkHash(digest)) {
unsigned int y[8];
readInt(yPtr, i, y);
setResultFound(numResults, results, i, true, x, y, digest);
}
}

beginBatchAdd(_INC_X, xPtr, chain, i, inverse);
beginBatchAdd(_INC_X, x, chain, i, inverse);
}

doBatchInverse(inverse);
Expand All @@ -294,14 +299,16 @@ __device__ void doIterationWithDouble(unsigned int *xPtr, unsigned int *yPtr, un
unsigned int inverse[8] = { 0,0,0,0,0,0,0,1 };
for(int i = 0; i < pointsPerThread; i++) {
unsigned int x[8];
unsigned int y[8];

unsigned int digest[5];

readInt(xPtr, i, x);
readInt(yPtr, i, y);


// uncompressed
if(compression == 1 || compression == 2) {
unsigned int y[8];
readInt(yPtr, i, y);
hashPublicKey(x, y, digest);

if(checkHash(digest)) {
Expand All @@ -311,9 +318,12 @@ __device__ void doIterationWithDouble(unsigned int *xPtr, unsigned int *yPtr, un

// compressed
if(compression == 0 || compression == 2) {
hashPublicKeyCompressed(x, y, digest);
hashPublicKeyCompressed(x, readIntLSW(yPtr, i), digest);

if(checkHash(digest)) {
unsigned int y[8];
readInt(yPtr, i, y);

setResultFound(numResults, results, i, true, x, y, digest);
}
}
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export CUDA_INCLUDE
export CUDA_MATH


all: dir_addressutil dir_cmdparse dir_cryptoutil dir_keyfinderlib dir_keyfinder dir_cudadevicecontext dir_cudautil dir_secp256k1lib dir_util
all: dir_addressutil dir_cmdparse dir_cryptoutil dir_keyfinderlib dir_keyfinder dir_cudadevicecontext dir_cudautil dir_secp256k1lib dir_util dir_cudainfo

dir_addressutil: dir_util dir_secp256k1lib dir_cryptoutil
make --directory AddressUtil
Expand Down Expand Up @@ -65,6 +65,8 @@ dir_secp256k1lib: dir_cryptoutil
dir_util:
make --directory util

dir_cudainfo:
make --directory cudaInfo

clean:
make --directory AddressUtil clean
Expand All @@ -76,6 +78,7 @@ clean:
make --directory cudaUtil clean
make --directory secp256k1lib clean
make --directory util clean
make --directory cudaInfo clean

rm -rf ${LIBDIR}
rm -rf ${BINDIR}
9 changes: 9 additions & 0 deletions cudaInfo/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CPPSRC:=$(wildcard *.cpp)

all:
${CXX} -o cudainfo.bin ${CPPSRC} ${INCLUDE} -I${CUDA_INCLUDE} ${CXXFLAGS} ${LIBS} -L${CUDA_LIB} -lcudautil -lcudart
mkdir -p $(BINDIR)
cp cudainfo.bin $(BINDIR)/cudainfo

clean:
rm -rf cudainfo.bin
84 changes: 21 additions & 63 deletions cudaMath/secp256k1.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include "ptx.cuh"


__device__ __forceinline__ void copyBigInt(const unsigned int *src, unsigned int *dest);


/**
Prime modulus 2^256 - 2^32 - 977
*/
Expand Down Expand Up @@ -50,7 +47,7 @@ __constant__ unsigned int _LAMBDA[8] = {
};


__device__ __forceinline__ void copyBigInt(const unsigned int *src, unsigned int *dest)
__device__ __forceinline__ void copyBigInt(const unsigned int src[8], unsigned int dest[8])
{
for(int i = 0; i < 8; i++) {
dest[i] = src[i];
Expand Down Expand Up @@ -97,6 +94,19 @@ __device__ void readInt(const unsigned int *ara, int idx, unsigned int x[8])
}
}

__device__ unsigned int readIntLSW(const unsigned int *ara, int idx)
{
int totalThreads = gridDim.x * blockDim.x;

int base = idx * totalThreads * 8;

int threadId = blockDim.x * blockIdx.x + threadIdx.x;

int index = base + threadId;

return ara[index + totalThreads * 7];
}

/**
* Writes an 8-word big integer to device memory
*/
Expand Down Expand Up @@ -179,50 +189,6 @@ __device__ unsigned int sub(const unsigned int a[8], const unsigned int b[8], un
return (borrow & 0x01);
}

/**
Subtract using two's compliment
*/
__device__ unsigned int sub2c(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
{
add_cc(c[7], a[7], ~b[7]);
addc_cc(c[6], a[6], ~b[6]);
addc_cc(c[5], a[5], ~b[5]);
addc_cc(c[4], a[4], ~b[4]);
addc_cc(c[3], a[3], ~b[3]);
addc_cc(c[2], a[2], ~b[2]);
addc_cc(c[1], a[1], ~b[1]);
addc_cc(c[0], a[0], ~b[0]);

unsigned int carry = 0;
addc(carry, 0, 0);

add_cc(c[7], c[7], 1);
addc_cc(c[6], c[6], 0);
addc_cc(c[5], c[5], 0);
addc_cc(c[4], c[4], 0);
addc_cc(c[3], c[3], 0);
addc_cc(c[2], c[2], 0);
addc_cc(c[1], c[1], 0);
addc_cc(c[0], c[0], 0);

addc(carry, carry, 0);

return carry;
}

__device__ void divBy2(unsigned int x[8])
{
x[7] = (x[7] >> 1) | (x[6] << 31);
x[6] = (x[6] >> 1) | (x[5] << 31);
x[5] = (x[5] >> 1) | (x[4] << 31);
x[4] = (x[4] >> 1) | (x[3] << 31);
x[3] = (x[3] >> 1) | (x[2] << 31);
x[2] = (x[2] >> 1) | (x[1] << 31);
x[1] = (x[1] >> 1) | (x[0] << 31);
x[0] = (x[0] >> 1);
}



__device__ void addModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
{
Expand Down Expand Up @@ -262,19 +228,15 @@ __device__ void addModP(const unsigned int a[8], const unsigned int b[8], unsign



/**
* Multiplication mod P
*/
__device__ void mulModP(const unsigned int a[8], const unsigned int b[8], unsigned int c[8])
{
unsigned int high[8];
unsigned int high[8] = { 0 };

unsigned int t = a[7];

// a[7] * b (low)
for (int i = 7; i >= 0; i--) {
for(int i = 7; i >= 0; i--) {
c[i] = t * b[i];
high[i] = 0;
}

// a[7] * b (high)
Expand Down Expand Up @@ -311,8 +273,6 @@ __device__ void mulModP(const unsigned int a[8], const unsigned int b[8], unsign
madc_hi_cc(high[7], t, b[1], high[7]);
madc_hi(high[6], t, b[0], high[6]);



// a[5] * b (low)
t = a[5];
mad_lo_cc(c[5], t, b[7], c[5]);
Expand Down Expand Up @@ -460,7 +420,7 @@ __device__ void mulModP(const unsigned int a[8], const unsigned int b[8], unsign
// At this point we have 16 32-bit words representing a 512-bit value
// high[0 ... 7] and c[0 ... 7]
const unsigned int s = 977;

// Store high[6] and high[7] since they will be overwritten
unsigned int high7 = high[7];
unsigned int high6 = high[6];
Expand Down Expand Up @@ -676,17 +636,15 @@ __device__ void negModP(const unsigned int *value, unsigned int *negative)
}


__device__ __forceinline__ void beginBatchAdd(const unsigned int *px, unsigned int *xPtr, unsigned int *chain, int i, unsigned int inverse[8])
__device__ __forceinline__ void beginBatchAdd(const unsigned int *px, const unsigned int *x, unsigned int *chain, int i, unsigned int inverse[8])
{
unsigned int x[8];
readInt(xPtr, i, x);

// x = Gx - x
subModP(px, x, x);
unsigned int t[8];
subModP(px, x, t);

// Keep a chain of multiples of the diff, i.e. c[0] = diff0, c[1] = diff0 * diff1,
// c[2] = diff2 * diff1 * diff0, etc
mulModP(x, inverse);
mulModP(t, inverse);

writeInt(chain, i, inverse);
}
Expand Down
4 changes: 2 additions & 2 deletions cudaMath/sha256.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,13 @@ __device__ void sha256PublicKey(const unsigned int x[8], const unsigned int y[8]
digest[7] = tmp[7] + h;
}

__device__ void sha256PublicKeyCompressed(const unsigned int x[8], const unsigned int y[8], unsigned int digest[8])
__device__ void sha256PublicKeyCompressed(const unsigned int x[8], unsigned int yParity, unsigned int digest[8])
{
unsigned int a, b, c, d, e, f, g, h;
unsigned int w[16];

// 0x03 || x or 0x02 || x
w[0] = 0x02000000 | ((y[7] & 1) << 24) | (x[0] >> 8);
w[0] = 0x02000000 | ((yParity & 1) << 24) | (x[0] >> 8);

w[1] = (x[1] >> 8) | (x[0] << 24);
w[2] = (x[2] >> 8) | (x[1] << 24);
Expand Down
5 changes: 3 additions & 2 deletions util/util.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include<stdio.h>
#include<string>
#include<fstream>
#include<vector>
#include<set>
Expand Down Expand Up @@ -181,7 +182,7 @@ namespace util {

bool readLinesFromStream(const std::string &fileName, std::vector<std::string> &lines)
{
std::ifstream inFile(fileName);
std::ifstream inFile(fileName.c_str());

if(!inFile.is_open()) {
return false;
Expand Down Expand Up @@ -212,7 +213,7 @@ namespace util {
newline = true;
}

outFile.open(fileName, std::ios::app);
outFile.open(fileName.c_str(), std::ios::app);

if(!outFile.is_open()) {
return false;
Expand Down

0 comments on commit f3a9a7d

Please sign in to comment.