-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
510c2bd
commit f1de749
Showing
36 changed files
with
2,754 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
|
||
#include "../utility/rt.h" | ||
#include "../utility/data_size.h" | ||
|
||
#ifdef ORG | ||
#define NI 256 | ||
#define NJ 256 | ||
#define NK 256 | ||
#define NL 256 | ||
#elif defined (TX) | ||
#elif defined (FX) | ||
#elif defined (EX) | ||
#endif | ||
|
||
#define TMP_OFFSET 0 | ||
#define A_OFFSET NI * NJ | ||
#define B_OFFSET NI * NJ + NI * NK | ||
#define D_OFFSET NI * NJ + NI * NK + NJ * NK | ||
#define C_OFFSET NI * NJ + NI * NK + NJ * NK + NI * NL | ||
|
||
|
||
|
||
void mm2_trace(double* tmp, double* A, double* B, double* C, double* D, double alpha, double beta) { | ||
|
||
int i, j, k; | ||
|
||
for (i = 0; i < NI; i++) { | ||
for (j = 0; j < NJ; j++) { | ||
tmp[i * NJ + j] = 0.0; | ||
rtTmpAccess(TMP_OFFSET + i * NJ + j, 0, 0); | ||
for (k = 0; k < NK; ++k) { | ||
tmp[i * NJ + j] += alpha * A[i * NK + k] * B[k * NJ + j]; | ||
rtTmpAccess(A_OFFSET + i * NK + k, 1, 1); | ||
rtTmpAccess(B_OFFSET + k * NJ + j, 2, 2); | ||
rtTmpAccess(TMP_OFFSET + i * NJ + j, 3, 0); | ||
rtTmpAccess(TMP_OFFSET + i * NJ + j, 4, 0); | ||
} | ||
} | ||
} | ||
for (i = 0; i < NI; i++) { | ||
for (j = 0; j < NL; j++) { | ||
D[i * NL + j] *= beta; | ||
rtTmpAccess(D_OFFSET + i * NL + j, 5, 3); | ||
for (k = 0; k < NJ; ++k) { | ||
D[i * NL + j] += tmp[i * NJ + k] * C[k * NL + j]; | ||
rtTmpAccess(TMP_OFFSET + i * NJ + k, 6, 0); | ||
rtTmpAccess(C_OFFSET + k * NL + j, 7, 4); | ||
rtTmpAccess(D_OFFSET + i * NL + j, 8, 3); | ||
rtTmpAccess(D_OFFSET + i * NL + j, 9, 3); | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
||
int main() { | ||
|
||
double* tmp = (double*)malloc( NI * NJ * sizeof(double)); | ||
double* A = (double*)malloc( NI * NK * sizeof(double)); | ||
double* B = (double*)malloc( NK * NJ * sizeof(double)); | ||
double* C = (double*)malloc( NJ * NL * sizeof(double)); | ||
double* D = (double*)malloc( NI * NL * sizeof(double)); | ||
double alpha = 0.1; | ||
double beta = 0.5; | ||
|
||
mm2_trace(tmp, A, B, C, D, alpha, beta); | ||
|
||
dumpSetSize(); | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
|
||
#include "../utility/rt.h" | ||
#include "../utility/data_size.h" | ||
|
||
#ifdef ORG | ||
#define NI 256 | ||
#define NJ 256 | ||
#define NL 256 | ||
#define NK 256 | ||
#define NM 256 | ||
#elif defined (TX) | ||
#define NI 512 | ||
#define NJ 256 | ||
#define NL 512 | ||
#define NK 256 | ||
#define NM 256 | ||
#elif defined (FX) | ||
#define NI 1024 | ||
#define NJ 256 | ||
#define NL 1024 | ||
#define NK 256 | ||
#define NM 256 | ||
#elif defined (EX) | ||
#define NI 2048 | ||
#define NJ 256 | ||
#define NL 2048 | ||
#define NK 256 | ||
#define NM 256 | ||
#endif | ||
|
||
#define A_OFFSET 0 | ||
#define B_OFFSET NK * NI | ||
#define C_OFFSET NK * NI + NK * NJ | ||
#define D_OFFSET NK * NI + NK * NJ + NJ * NM | ||
#define E_OFFSET NK * NI + NK * NJ + NJ * NM + NM * NL | ||
#define F_OFFSET NK * NI + NK * NJ + NJ * NM + NM * NL + NI * NJ | ||
#define G_OFFSET NK * NI + NK * NJ + NJ * NM + NM * NL + NI * NJ + NJ * NL | ||
|
||
|
||
void mm3_cpu_trace(int *A, int *B, int *C, int *D, int *E, int *F, int *G) { | ||
|
||
int i, j, k; | ||
|
||
/* E := A*B */ | ||
for (i = 0; i < NI; i++) | ||
{ | ||
for (j = 0; j < NJ; j++) | ||
{ | ||
E[i * NJ + j] = 0; | ||
|
||
rtTmpAccess(E_OFFSET + i * NJ + j, 0, 0); | ||
|
||
for (k = 0; k < NK; ++k) | ||
{ | ||
E[i * NJ + j] += A[i * NK + k] * B[k * NJ + j]; | ||
rtTmpAccess(A_OFFSET + i * NK + k, 1, 1); | ||
rtTmpAccess(B_OFFSET + k * NJ + j, 2, 2); | ||
rtTmpAccess(E_OFFSET + i * NJ + j, 3, 0); | ||
rtTmpAccess(E_OFFSET + i * NJ + j, 4, 0); | ||
} | ||
} | ||
} | ||
|
||
/* F := C*D */ | ||
for (i = 0; i < NJ; i++) | ||
{ | ||
for (j = 0; j < NL; j++) | ||
{ | ||
F[i * NL + j] = 0; | ||
|
||
rtTmpAccess(F_OFFSET + i * NL + j, 5, 3); | ||
|
||
for (k = 0; k < NM; ++k) | ||
{ | ||
F[i * NL + j] += C[i * NM + k] * D[k * NL + j]; | ||
rtTmpAccess(C_OFFSET + i * NM + k, 6, 4); | ||
rtTmpAccess(D_OFFSET + k * NL + j, 7, 5); | ||
rtTmpAccess(F_OFFSET + i * NL + j, 8, 3); | ||
rtTmpAccess(F_OFFSET + i * NL + j, 9, 3); | ||
} | ||
} | ||
} | ||
|
||
/* G := E*F */ | ||
for (i = 0; i < NI; i++) | ||
{ | ||
for (j = 0; j < NL; j++) | ||
{ | ||
G[i * NL + j] = 0; | ||
|
||
rtTmpAccess(G_OFFSET + i * NL + j, 10, 6); | ||
|
||
for (k = 0; k < NJ; ++k) | ||
{ | ||
G[i * NL + j] += E[i * NJ + k] * F[k * NL + j]; | ||
rtTmpAccess(E_OFFSET + i * NJ + k, 11, 0); | ||
rtTmpAccess(F_OFFSET + k * NL + j, 12, 3); | ||
rtTmpAccess(G_OFFSET + i * NL + j, 13, 6); | ||
rtTmpAccess(G_OFFSET + i * NL + j, 14, 6); | ||
} | ||
} | ||
} | ||
|
||
return; | ||
} | ||
|
||
|
||
|
||
int main() { | ||
int* a = (int*)malloc( NK * NI * sizeof(int) ); | ||
int* b = (int*)malloc( NK * NJ * sizeof(int) ); | ||
int* c = (int*)malloc( NJ * NM * sizeof(int) ); | ||
int* d = (int*)malloc( NM * NL * sizeof(int) ); | ||
int* e = (int*)malloc( NI * NJ * sizeof(int) ); | ||
int* f = (int*)malloc( NJ * NL * sizeof(int) ); | ||
int* g = (int*)malloc( NI * NL * sizeof(int) ); | ||
|
||
for (int i = 0; i < NK * NI; i++) { | ||
a[i] = i % 256; | ||
} | ||
|
||
for (int i = 0; i < NK * NJ; i++) { | ||
b[i] = i % 256; | ||
} | ||
|
||
for (int i = 0; i < NJ * NM; i++) { | ||
c[i] = i % 256; | ||
} | ||
|
||
for (int i = 0; i < NM * NL; i++) { | ||
d[i] = i % 256; | ||
} | ||
|
||
mm3_cpu_trace(a, b, c, d, e, f, g); | ||
|
||
dumpSetSize(); | ||
|
||
return 0; | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
|
||
#include "../utility/rt.h" | ||
#include "../utility/data_size.h" | ||
|
||
#ifdef ORG | ||
#define N 1024 | ||
#define TSTEPS 10 | ||
#elif defined (TX) | ||
#elif defined (FX) | ||
#elif defined (EX) | ||
#endif | ||
|
||
#define P_OFFSET 0 | ||
#define Q_OFFSET 0 + N * N | ||
#define V_OFFSET 0 + N * N + N * N | ||
#define U_OFFSET 0 + N * N + N * N + N * N | ||
|
||
void adi_trace(double* p, double* q, double* v, double* u) { | ||
|
||
int t, i, j; | ||
double DX, DY, DT, B1, B2, mul1, mul2, a, b, c, d, e, f; | ||
|
||
DX = 1.0 / N; | ||
DY = 1.0 / N; | ||
DT = 1.0 / TSTEPS; | ||
B1 = 2.0; | ||
B2 = 1.0; | ||
mul1 = B1 * DT / (DX * DX); | ||
mul2 = B2 * DT / (DY * DY); | ||
|
||
a = -mul1 / 2.0; | ||
b = 1.0 + mul1; | ||
c = a; | ||
d = -mul2 / 2.0; | ||
e = 1.0 + mul2; | ||
f = d; | ||
|
||
for (t=1; t <= TSTEPS; t++) { | ||
//Column Sweep | ||
for (i=1; i< N-1; i++) { | ||
v[0 * N + i] = 1.0; | ||
p[i * N + 0] = 0.0; | ||
q[i * N + 0] = v[0 * N + i]; | ||
rtTmpAccess(V_OFFSET + 0 * N + i, 0, 0); | ||
rtTmpAccess(P_OFFSET + i * N + 0, 1, 1); | ||
rtTmpAccess(V_OFFSET + 0 * N + i, 2, 0); | ||
rtTmpAccess(Q_OFFSET + i * N + 0, 3, 2); | ||
for (j=1; j< N-1; j++) { | ||
p[i * N + j] = -c / (a*p[i * N + j-1]+b); | ||
q[i * N + j] = (-d*u[j * N + i-1]+(1.0 + 2.0 * d)*u[j * N + i] - f*u[j * N + i+1]-a*q[i * N + j-1])/(a*p[i * N + j-1]+b); | ||
rtTmpAccess(P_OFFSET + i * N + j-1, 4, 1); | ||
rtTmpAccess(P_OFFSET + i * N + j, 5, 1); | ||
rtTmpAccess(U_OFFSET + j * N + i-1, 6, 3); | ||
rtTmpAccess(U_OFFSET + j * N + i, 7, 3); | ||
rtTmpAccess(U_OFFSET + j * N + i+1, 8, 3); | ||
rtTmpAccess(Q_OFFSET + i * N + j-1, 9, 2); | ||
rtTmpAccess(P_OFFSET + i * N + j-1, 10, 1); | ||
rtTmpAccess(Q_OFFSET + i * N + j, 11, 2); | ||
} | ||
v[(N - 1) * N + i] = 1.0; | ||
rtTmpAccess(V_OFFSET + (N - 1) * N + i, 12, 0); | ||
for (j= N-2; j>=1; j--) { | ||
v[j * N + i] = p[i * N + j] * v[(j+1) * N + i] + q[i * N + j]; | ||
rtTmpAccess(P_OFFSET + i * N + j, 13, 1); | ||
rtTmpAccess(V_OFFSET + (j+1) * N + i, 14, 0); | ||
rtTmpAccess(Q_OFFSET + i * N + j, 15, 2); | ||
rtTmpAccess(V_OFFSET + j * N + i, 16, 0); | ||
} | ||
} | ||
//Row Sweep | ||
for (i=1; i < N - 1; i++) { | ||
u[i * N + 0] = 1.0; | ||
p[i * N + 0] = 0.0; | ||
q[i * N + 0] = u[i * N + 0]; | ||
rtTmpAccess(U_OFFSET + i * N + 0, 17, 3); | ||
rtTmpAccess(P_OFFSET + i * N + 0, 18, 1); | ||
rtTmpAccess(U_OFFSET + i * N + 0, 19, 3); | ||
rtTmpAccess(Q_OFFSET + i * N + 0, 20, 2); | ||
for (j=1; j< N - 1; j++) { | ||
p[i * N + j] = -f / (d*p[i * N + j-1]+e); | ||
q[i * N + j] = (-a*v[(i-1) * N + j]+(1.0 + 2.0 * a)*v[i * N + j] - c*v[(i+1) * N + j]-d*q[i * N + j-1])/(d*p[i * N + j-1]+e); | ||
rtTmpAccess(P_OFFSET + i * N + j-1, 21, 1); | ||
rtTmpAccess(P_OFFSET + i * N + j, 22, 1); | ||
rtTmpAccess(V_OFFSET + (i-1) * N + j, 23, 0); | ||
rtTmpAccess(V_OFFSET + i * N + j, 24, 0); | ||
rtTmpAccess(V_OFFSET + (i+1) * N + j, 25, 0); | ||
rtTmpAccess(Q_OFFSET + i * N + j-1, 26, 2); | ||
rtTmpAccess(P_OFFSET + i * N + j-1, 27, 1); | ||
rtTmpAccess(Q_OFFSET + i * N + j, 28, 2); | ||
} | ||
u[i * N + N - 1 ] = 1.0; | ||
rtTmpAccess(U_OFFSET + i * N + N - 1, 29, 3); | ||
for (j= N - 2; j>=1; j--) { | ||
u[i * N + j] = p[i * N + j] * u[i * N + j+1] + q[i * N + j]; | ||
rtTmpAccess(P_OFFSET + i * N + j, 30, 1); | ||
rtTmpAccess(U_OFFSET + i * N + j+1, 31, 3); | ||
rtTmpAccess(Q_OFFSET + i * N + j, 32, 2); | ||
rtTmpAccess(U_OFFSET + i * N + j, 33, 3); | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
int main() { | ||
|
||
double * p = (double *) malloc(N * N * sizeof(double)); | ||
double * q = (double *) malloc(N * N * sizeof(double)); | ||
double * v = (double *) malloc(N * N * sizeof(double)); | ||
double * u = (double *) malloc(N * N * sizeof(double)); | ||
|
||
adi_trace(p, q, v, u); | ||
|
||
dumpSetSize(); | ||
|
||
return 0; | ||
} | ||
|
||
|
Oops, something went wrong.