Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cblas_sgmem segfaults using big matrixes #504

Closed
quickwritereader opened this issue Feb 17, 2015 · 3 comments
Closed

cblas_sgmem segfaults using big matrixes #504

quickwritereader opened this issue Feb 17, 2015 · 3 comments

Comments

@quickwritereader
Copy link
Contributor

Problem

Program segfaults when multiplying big matrixes. We also allocated memory with blas_memory_alloc. It gave another problem stating "too many memory regions". We also checked issue #85. But nothing worked. hope You guide us.
thxx

here is test code;

//......
#define ALIGNMET_SIZE 16
typedef struct Matrix {
   float *matrix;
   uint32_t rows;
   uint32_t columns;

} Matrix;


Matrix makeInputMatrix(int rows, int cols) {
    Matrix matrix;
    matrix.rows = rows;
    matrix.columns = cols;
    int64_t expectedSize = (matrix.rows*matrix.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
    //matrix.matrix = (float*)blas_memory_alloc(expectedSize);
    matrix.matrix = (float*)memalign(ALIGNMET_SIZE,  expectedSize);
    srandom(getpid());
    for(int i = 0; i < rows; i++) {
        for(int j = 0; j < cols; j++) {
            matrix.matrix[i*cols + j] = abs((float)rand() / (float)RAND_MAX - 0.5f);
        }
    }
    return matrix;
}

Matrix multMatrixes(Matrix &a, Matrix &b, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, float alpha, float betta) {
    Matrix c  ;
    c.rows = a.rows;
    c.columns = b.columns;
    int64_t expectedSize = (c.rows * c.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
    //c.matrix = (float*)blas_memory_alloc(expectedSize);
    c.matrix = (float*)memalign(ALIGNMET_SIZE, expectedSize);
    fprintf(stderr, "Start address for matrix C [%p]\n", c.matrix);
    cblas_sgemm(CblasRowMajor, transA, transB, a.rows, b.columns, a.columns, 
            alpha, a.matrix, a.columns, b.matrix,a.rows, betta, c.matrix, c.rows);
    return c;
}



int main(){ 
  //test
    Matrix A=makeInputMatrix(2048,1320);
    Matrix B=makeInputMatrix(1320,64);
    //segFault for bigger sizes 
    Matrix C=multMatrixes(A,B,CblasNoTrans,CblasNoTrans,1.0f,0);

    //...freeing mem  
    ///...///
    return 0;
}

valgrind detects:

_Start address for matrix C [0x718ad70]_
_==21152== Invalid write of size 4_
==21152== at 0x50C3057: sgemm_beta (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x4F6BDF1: sgemm_nn (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x4EB4D05: cblas_sgemm (in /usr/lib/libopenblas_penrynp-r0.2.13.so)
==21152== by 0x404C1A: multMatrixes(Matrix&, Matrix&, CBLAS_TRANSPOSE, CBLAS_TRANSPOSE, float, float) (main.cpp:93)
==21152== by 0x404CA0: main (main.cpp:106)
==21152== Address 0x720ad80 is 0 bytes after a block of size 524,304 alloc'd*
==21152== at 0x4C29BE2: memalign (vg_replace_malloc.c:694)
==21152== by 0x404B83: multMatrixes(Matrix&, Matrix&, CBLAS_TRANSPOSE, CBLAS_TRANSPOSE, float, float) (main.cpp:90)
==21152== by 0x404CA0: main (main.cpp:106)

Cpu_info:

Intel® Core™2 Quad CPU Q8200 @ 2.33GHz × 4
Penryn (microarchitecture)

openblas was build using

make BINARY=64 INTERFACE=64 USE_OPENMP=1

@carlkl
Copy link

carlkl commented Feb 17, 2015

test with mingw-w64 (64bit) and symbol tables

#ifdef _WIN32
#include <windows.h>
#else
#include <sys/time.h>
#include <linux/limits.h>
#endif

#include <malloc.h>
#include <stdio.h>
#include <stdint.h>
#include <cblas.h>
#define ALIGNMET_SIZE 16

#ifdef __MINGW32__
#define _aligned_malloc __mingw_aligned_malloc
#define _aligned_free  __mingw_aligned_free
#endif //MINGW

typedef struct Matrix {
   float *matrix;
   uint32_t rows;
   uint32_t columns;

} Matrix;

Matrix makeInputMatrix(int rows, int cols) {
    Matrix matrix;
    matrix.rows = rows;
    matrix.columns = cols;
    int64_t expectedSize = (matrix.rows*matrix.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
    //matrix.matrix = (float*)blas_memory_alloc(expectedSize);
    #if defined (_WIN32)
        matrix.matrix = (float*)_aligned_malloc(expectedSize, ALIGNMET_SIZE);
    #else
        matrix.matrix = (float*)memalign(ALIGNMET_SIZE,  expectedSize);
    #endif
    //srandom(getpid());
    for(int i = 0; i < rows; i++) {
        for(int j = 0; j < cols; j++) {
            matrix.matrix[i*cols + j] = abs((float)rand() / (float)RAND_MAX - 0.5f);
        }
    }
    return matrix;
}

Matrix multMatrixes(Matrix &a, Matrix &b, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, float alpha, float betta) {
    Matrix c  ;
    c.rows = a.rows;
    c.columns = b.columns;
    int64_t expectedSize = (c.rows * c.columns * sizeof(float) + ALIGNMET_SIZE)&(~(ALIGNMET_SIZE - 1));
    //c.matrix = (float*)blas_memory_alloc(expectedSize);
    #if defined (_WIN32)
        c.matrix = (float*)_aligned_malloc(expectedSize, ALIGNMET_SIZE);
    #else
        c.matrix = (float*)memalign(ALIGNMET_SIZE, expectedSize);
    #endif
    fprintf(stderr, "Start address for matrix C [%p]\n", c.matrix);
    cblas_sgemm(CblasRowMajor, transA, transB, a.rows, b.columns, a.columns, 
            alpha, a.matrix, a.columns, b.matrix,a.rows, betta, c.matrix, c.rows);
    return c;
}

int main(){ 
  //test
    Matrix A=makeInputMatrix(2048,1320);
    Matrix B=makeInputMatrix(1320,64);
    //segFault for bigger sizes 
    Matrix C=multMatrixes(A,B,CblasNoTrans,CblasNoTrans,1.0f,0);

    //...freeing mem  
    ///...///
    return 0;
}
g++ -g -I/<PATH>/include  -L/<PATH>/lib   ob504.c++ -lopenblas
$ gdb a.exe
GNU gdb (GDB) 7.8.1
This GDB was configured as "x86_64-w64-mingw32".
Reading symbols from a.exe...done.
(gdb) r
Starting program: D:\devel\packages\tests\OpenBLAS\issue504\a.exe
[New Thread 4896.0x11a0]
[New Thread 4896.0x1128]
[New Thread 4896.0x13e4]
[New Thread 4896.0x1164]
[New Thread 4896.0x1104]
Start address for matrix C [0000000008FC28F0]

Program received signal SIGSEGV, Segmentation fault.
sgemm_beta_SANDYBRIDGE () at ../kernel/x86_64/gemm_beta.S:122
122             MOVSD   %xmm0, 0 * SIZE(C1)

@martin-frbg
Copy link
Collaborator

Possibly related to resolved issue 218.
Without blas_memory_alloc, you are almost certainly running out of heap space for the big matrices.
(Maybe you can work around this by setting OMP_STACKSIZE to some multi-gigabyte value, but it would not be pretty)
The "too many memory regions" error comes from blas_malloc'ing in too many threads - setting OMP_NUM_THREADS=1 should solve this. (Though others may provide better solutions in this thread)

@quickwritereader
Copy link
Contributor Author

Thx guys for the help.Today we checked sgemm function. Openblas worked well. Problem was in
our cblas_sgemm usage.Parameters were passed incorrectly.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants