Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a A @ B.dag() operation #2610

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/2610.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Specialized A @ B.dag() operation.
219 changes: 219 additions & 0 deletions qutip/core/data/matmul.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ cnp.import_array()
cdef extern from *:
void *PyMem_Calloc(size_t n, size_t elsize)


cdef extern from "<complex>" namespace "std" nogil:
double complex conj "conj"(double complex x)

# This function is templated over integral types on import to allow `idxint` to
# be any signed integer (though likely things will only work for >=32-bit). To
# change integral types, you only need to change the `idxint` definitions in
Expand All @@ -42,6 +46,10 @@ cdef extern from "src/matmul_csr_vector.hpp" nogil:
double complex *data, T *col_index, T *row_index,
double complex *vec, double complex scale, double complex *out,
T nrows)
void _matmul_dag_csr_vector[T](
double complex *data, T *col_index, T *row_index,
double complex *vec, double complex scale, double complex *out,
T nrows)

cdef extern from "src/matmul_diag_vector.hpp" nogil:
void _matmul_diag_vector[T](
Expand All @@ -56,6 +64,7 @@ __all__ = [
'matmul', 'matmul_csr', 'matmul_dense', 'matmul_dia',
'matmul_csr_dense_dense', 'matmul_dia_dense_dense', 'matmul_dense_dia_dense',
'multiply', 'multiply_csr', 'multiply_dense', 'multiply_dia',
'matmul_dag', 'matmul_dag_data', 'matmul_dag_dense', 'matmul_dag_dense_csr_dense',
]


Expand Down Expand Up @@ -518,6 +527,173 @@ cpdef Dense matmul_dense_dia_dense(Dense left, Dia right, double complex scale=1
return out


cpdef Dense matmul_dag_dense_csr_dense(
Dense left, CSR right,
double complex scale=1, Dense out=None
):
"""
Perform the operation
``out := scale * (left @ right) + out``
where `left`, `right` and `out` are matrices. `scale` is a complex scalar,
defaulting to 1.

If `out` is not given, it will be allocated as if it were a zero matrix.
"""
if left.shape[1] != right.shape[1]:
raise ValueError(
"incompatible matrix shapes "
+ str(left.shape)
+ " and "
+ str(right.shape)
)
if (
out is not None and (
out.shape[0] != left.shape[0]
or out.shape[1] != right.shape[0]
)
):
raise ValueError(
"incompatible output shape, got "
+ str(out.shape)
+ " but needed "
+ str((left.shape[0], right.shape[0]))
)
cdef Dense tmp = None
if out is None:
out = dense.zeros(left.shape[0], right.shape[0], left.fortran)
if bool(left.fortran) != bool(out.fortran):
msg = (
"out matrix is {}-ordered".format('Fortran' if out.fortran else 'C')
+ " but input is {}-ordered".format('Fortran' if left.fortran else 'C')
)
warnings.warn(msg, OrderEfficiencyWarning)
# Rather than making loads of copies of the same code, we just moan at
# the user and then transpose one of the arrays. We prefer to have
# `right` in Fortran-order for cache efficiency.
if left.fortran:
tmp = out
out = out.reorder()
else:
left = left.reorder()
cdef idxint row, col, ptr, idx_l, idx_out, out_row, idx_c
cdef idxint stride_in_col, stride_in_row, stride_out_row, stride_out_col
cdef idxint nrows=left.shape[0], ncols=right.shape[1]
cdef double complex val
stride_in_col = left.shape[0] if left.fortran else 1
stride_in_row = 1 if left.fortran else left.shape[1]
stride_out_col = out.shape[0] if out.fortran else 1
stride_out_row = 1 if out.fortran else out.shape[1]

# A @ B.dag = (B* @ A.T).T
if left.fortran:
for row in range(right.shape[0]):
for ptr in range(right.row_index[row], right.row_index[row + 1]):
val = scale * conj(right.data[ptr])
col = right.col_index[ptr]
for out_row in range(out.shape[0]):
idx_out = row * stride_out_col + out_row * stride_out_row
idx_l = col * stride_in_col + out_row * stride_in_row
out.data[idx_out] += val * left.data[idx_l]

else:
idx_c = idx_out = 0
for _ in range(nrows):
_matmul_dag_csr_vector(
right.data, right.col_index, right.row_index,
left.data + idx_c,
scale, out.data + idx_out,
right.shape[0]
)
idx_out += right.shape[0]
idx_c += left.shape[1]

if tmp is None:
return out
memcpy(tmp.data, out.data, ncols * nrows * sizeof(double complex))
return tmp


cpdef Dense matmul_dag_dense(
Dense left, Dense right,
double complex scale=1., Dense out=None
):
# blas support matmul for normal, transpose, adjoint for fortran ordered
# matrices.
if left.shape[1] != right.shape[1]:
raise ValueError(
"incompatible matrix shapes "
+ str(left.shape)
+ " and "
+ str(right.shape)
)
if (
out is not None and (
out.shape[0] != left.shape[0]
or out.shape[1] != right.shape[0]
)
):
raise ValueError(
"incompatible output shape, got "
+ str(out.shape)
+ " but needed "
+ str((left.shape[0], right.shape[0]))
)
cdef Dense a, b, out_add=None
cdef double complex alpha = 1., out_scale = 0.
cdef int m, n, k = left.shape[1], lda, ldb, ldc
cdef char left_code, right_code

if not right.fortran:
# Need a conjugate, we compute the transpose of the desired results.
# A.conj @ B^op -> (B^T^op @ A.dag)^T
if out is not None and out.fortran:
# out is not the right order, create an empty out and add it back.
out_add = out
out = dense.empty(left.shape[0], right.shape[0], False)
elif out is None:
out = dense.empty(left.shape[0], right.shape[0], False)
else:
out_scale = 1.
m = right.shape[0]
n = left.shape[0]
a, b = right, left
lda = right.shape[1]
ldb = left.shape[0] if left.fortran else left.shape[1]
ldc = right.shape[0]

left_code = b'C'
right_code = b'T' if left.fortran else b'N'
else:
if out is not None and not out.fortran:
out_add = out
out = dense.empty(left.shape[0], right.shape[0], True)
elif out is None:
out = dense.empty(left.shape[0], right.shape[0], True)
else:
out_scale = 1.

m = left.shape[0]
n = right.shape[0]
a, b = left, right
lda = left.shape[0] if left.fortran else left.shape[1]
ldb = right.shape[0]
ldc = left.shape[0]

left_code = b'N' if left.fortran else b'T'
right_code = b'C'

blas.zgemm(
&left_code, &right_code, &m, &n, &k,
&scale, a.data, &lda, b.data, &ldb,
&out_scale, out.data, &ldc
)

if out_add is not None:
out = iadd_dense(out, out_add)

return out


cpdef CSR multiply_csr(CSR left, CSR right):
"""Element-wise multiplication of CSR matrices."""
if left.shape[0] != right.shape[0] or left.shape[1] != right.shape[1]:
Expand Down Expand Up @@ -732,6 +908,49 @@ multiply.add_specialisations([
], _defer=True)


cpdef Data matmul_dag_data(
Data left, Data right,
double complex scale=1, Dense out=None
):
return matmul(left, right.adjoint(), scale, out)


matmul_dag = _Dispatcher(
_inspect.Signature([
_inspect.Parameter('left', _inspect.Parameter.POSITIONAL_ONLY),
_inspect.Parameter('right', _inspect.Parameter.POSITIONAL_ONLY),
_inspect.Parameter('scale', _inspect.Parameter.POSITIONAL_OR_KEYWORD,
default=1),
]),
name='matmul_dag',
module=__name__,
inputs=('left', 'right'),
out=True,
)
matmul_dag.__doc__ =\
"""
Compute the matrix multiplication of two matrices, with the operation
scale * (left @ right.dag)
where `scale` is (optionally) a scalar, and `left` and `right` are
matrices.

Parameters
----------
left : Data
The left operand as either a bra or a ket matrix.

right : Data
The right operand as a ket matrix.

scale : complex, optional
The scalar to multiply the output by.
"""
matmul_dag.add_specialisations([
(Dense, CSR, Dense, matmul_dag_dense_csr_dense),
(Dense, Dense, Dense, matmul_dag_dense),
(Data, Data, Data, matmul_dag_data),
], _defer=True)

del _inspect, _Dispatcher


Expand Down
98 changes: 97 additions & 1 deletion qutip/core/data/src/matmul_csr_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

#include "matmul_csr_vector.hpp"

template <typename IntT>
#if \
(defined(__GNUC__) && defined(__SSE3__))\
|| (defined(_MSC_VER) && defined(__AVX__))
/* Manually apply the vectorisation. */
template <typename IntT>
void _matmul_csr_vector(
const std::complex<double> * _RESTRICT data,
const IntT * _RESTRICT col_index,
Expand Down Expand Up @@ -52,8 +52,53 @@ void _matmul_csr_vector(
_mm_storeu_pd((double *)&out[row], num3);
}
}
/*
// TODO: Fix the SIMD version of _matmul_dag_csr_vector
template <typename IntT>
void _matmul_dag_csr_vector(
const std::complex<double> * _RESTRICT data,
const IntT * _RESTRICT col_index,
const IntT * _RESTRICT row_index,
const std::complex<double> * _RESTRICT vec,
const std::complex<double> scale,
std::complex<double> * _RESTRICT out,
const IntT nrows)
{
IntT row_start, row_end;
__m128d num1, num2, num3, num4, num5;
for (IntT row=0; row < nrows; row++) {
num4 = _mm_setzero_pd();
num5 = _mm_setzero_pd();
row_start = row_index[row];
row_end = row_index[row+1];
for (IntT ptr=row_start; ptr < row_end; ptr++) {
num1 = _mm_loaddup_pd(&reinterpret_cast<const double(&)[2]>(data[ptr])[0]);
num2 = _mm_set_pd(std::imag(vec[col_index[ptr]]),
std::real(vec[col_index[ptr]]));
num3 = _mm_mul_pd(num2, num1);
num4 = _mm_add_pd(num3, num4);
num1 = _mm_loaddup_pd(&reinterpret_cast<const double(&)[2]>(data[ptr])[1]);
num2 = _mm_shuffle_pd(num2, num2, 1);
num2 = _mm_mul_pd(num2, num1);
num5 = _mm_addsub_pd(num5, num2);
}
num4 = _mm_sub_pd(num3, num5);
num1 = _mm_loaddup_pd(&reinterpret_cast<const double(&)[2]>(scale)[0]);
num3 = _mm_mul_pd(num4, num1);
num1 = _mm_loaddup_pd(&reinterpret_cast<const double(&)[2]>(scale)[1]);
num4 = _mm_shuffle_pd(num4, num4, 1);
num4 = _mm_mul_pd(num4, num1);
num3 = _mm_addsub_pd(num3, num4);
num2 = _mm_loadu_pd((double *)&out[row]);
num3 = _mm_add_pd(num2, num3);
// _mm_storeu_pd((double *)&out[row], num3);
_mm_storeu_pd((double *)&out[row], num1);
}
}
*/
#else
/* No manual vectorisation. */
template <typename IntT>
void _matmul_csr_vector(
const std::complex<double> * _RESTRICT data,
const IntT * _RESTRICT col_index,
Expand All @@ -79,6 +124,32 @@ void _matmul_csr_vector(
}
#endif

template <typename IntT>
void _matmul_dag_csr_vector(
const std::complex<double> * _RESTRICT data,
const IntT * _RESTRICT col_index,
const IntT * _RESTRICT row_index,
const std::complex<double> * _RESTRICT vec,
const std::complex<double> scale,
std::complex<double> * _RESTRICT out,
const IntT nrows)
{
IntT row_start, row_end;
std::complex<double> dot;
for (size_t row=0; row < nrows; row++)
{
dot = 0;
row_start = row_index[row];
row_end = row_index[row+1];
for (size_t ptr=row_start; ptr < row_end; ptr++)
{
dot += std::conj(data[ptr])*vec[col_index[ptr]];
}
out[row] += scale * dot;
}
}


/* It seems wrong to me to specify the integer specialisations as `int`, `long` and
* `long long` rather than just `int32_t` and `int64_t`, but for some reason the
* latter causes compatibility issues with defining the sized types with the
Expand Down Expand Up @@ -114,3 +185,28 @@ template void _matmul_csr_vector<>(
const std::complex<double>,
std::complex<double> * _RESTRICT,
const long long);

template void _matmul_dag_csr_vector<>(
const std::complex<double> * _RESTRICT,
const int * _RESTRICT,
const int * _RESTRICT,
const std::complex<double> * _RESTRICT,
const std::complex<double>,
std::complex<double> * _RESTRICT,
const int);
template void _matmul_dag_csr_vector<>(
const std::complex<double> * _RESTRICT,
const long * _RESTRICT,
const long * _RESTRICT,
const std::complex<double> * _RESTRICT,
const std::complex<double>,
std::complex<double> * _RESTRICT,
const long);
template void _matmul_dag_csr_vector<>(
const std::complex<double> * _RESTRICT,
const long long * _RESTRICT,
const long long * _RESTRICT,
const std::complex<double> * _RESTRICT,
const std::complex<double>,
std::complex<double> * _RESTRICT,
const long long);
10 changes: 10 additions & 0 deletions qutip/core/data/src/matmul_csr_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,13 @@ void _matmul_csr_vector(
const std::complex<double> scale,
std::complex<double> * _RESTRICT out,
const IntT nrows);

template <typename IntT>
void _matmul_dag_csr_vector(
const std::complex<double> * _RESTRICT data,
const IntT * _RESTRICT col_index,
const IntT * _RESTRICT row_index,
const std::complex<double> * _RESTRICT vec,
const std::complex<double> scale,
std::complex<double> * _RESTRICT out,
const IntT nrows);
Loading
Loading