-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cblas_sgmem segfaults using big matrixes #504
Comments
test with mingw-w64 (64bit) and symbol tables #ifdef _WIN32
#include <windows.h>
#else
#include <sys/time.h>
#include <linux/limits.h>
#endif
#include <malloc.h>
#include <stdio.h>
#include <stdint.h>
#include <cblas.h>
#define ALIGNMET_SIZE 16
#ifdef __MINGW32__
#define _aligned_malloc __mingw_aligned_malloc
#define _aligned_free __mingw_aligned_free
#endif //MINGW
typedef struct Matrix {
float *matrix;
uint32_t rows;
uint32_t columns;
} Matrix;
Matrix makeInputMatrix(int rows, int cols) {
Matrix matrix;
matrix.rows = rows;
matrix.columns = cols;
int64_t expectedSize = (matrix.rows*matrix.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
//matrix.matrix = (float*)blas_memory_alloc(expectedSize);
#if defined (_WIN32)
matrix.matrix = (float*)_aligned_malloc(expectedSize, ALIGNMET_SIZE);
#else
matrix.matrix = (float*)memalign(ALIGNMET_SIZE, expectedSize);
#endif
//srandom(getpid());
for(int i = 0; i < rows; i++) {
for(int j = 0; j < cols; j++) {
matrix.matrix[i*cols + j] = abs((float)rand() / (float)RAND_MAX - 0.5f);
}
}
return matrix;
}
Matrix multMatrixes(Matrix &a, Matrix &b, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, float alpha, float betta) {
Matrix c ;
c.rows = a.rows;
c.columns = b.columns;
int64_t expectedSize = (c.rows * c.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
//c.matrix = (float*)blas_memory_alloc(expectedSize);
#if defined (_WIN32)
c.matrix = (float*)_aligned_malloc(expectedSize, ALIGNMET_SIZE);
#else
c.matrix = (float*)memalign(ALIGNMET_SIZE, expectedSize);
#endif
fprintf(stderr, "Start address for matrix C [%p]\n", c.matrix);
cblas_sgemm(CblasRowMajor, transA, transB, a.rows, b.columns, a.columns,
alpha, a.matrix, a.columns, b.matrix,a.rows, betta, c.matrix, c.rows);
return c;
}
int main(){
//test
Matrix A=makeInputMatrix(2048,1320);
Matrix B=makeInputMatrix(1320,64);
//segFault for bigger sizes
Matrix C=multMatrixes(A,B,CblasNoTrans,CblasNoTrans,1.0f,0);
//...freeing mem
///...///
return 0;
} g++ -g -I/<PATH>/include -L/<PATH>/lib ob504.c++ -lopenblas
|
Possibly related to resolved issue 218. |
Thx guys for the help.Today we checked sgemm function. Openblas worked well. Problem was in |
Problem
Program segfaults when multiplying big matrixes. We also allocated memory with blas_memory_alloc. It gave another problem stating "too many memory regions". We also checked issue #85. But nothing worked. hope You guide us.
thxx
here is test code;
valgrind detects:
_Start address for matrix C [0x718ad70]_
_==21152== Invalid write of size 4_
==21152== at 0x50C3057: sgemm_beta (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x4F6BDF1: sgemm_nn (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x4EB4D05: cblas_sgemm (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x404C1A: multMatrixes(Matrix&, Matrix&, CBLAS_TRANSPOSE, CBLAS_TRANSPOSE, float, float) (main.cpp:93)
==21152== by 0x404CA0: main (main.cpp:106)
==21152== Address 0x720ad80 is 0 bytes after a block of size 524,304 alloc'd*
==21152== at 0x4C29BE2: memalign (vg_replace_malloc.c:694)
==21152== by 0x404B83: multMatrixes(Matrix&, Matrix&, CBLAS_TRANSPOSE, CBLAS_TRANSPOSE, float, float) (main.cpp:90)
==21152== by 0x404CA0: main (main.cpp:106)
Cpu_info:
Intel® Core™2 Quad CPU Q8200 @ 2.33GHz × 4
Penryn (microarchitecture)
openblas was build using
make BINARY=64 INTERFACE=64 USE_OPENMP=1
The text was updated successfully, but these errors were encountered: