Skip to content

Commit

Permalink
updated to most recent viennacl-dev code
Browse files Browse the repository at this point in the history
  • Loading branch information
Determan authored and Determan committed Oct 12, 2017
1 parent 5b6eb89 commit f3a5ddc
Show file tree
Hide file tree
Showing 39 changed files with 2,121 additions and 1,835 deletions.
4 changes: 2 additions & 2 deletions inst/include/viennacl/backend/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ namespace backend
case CUDA_MEMORY:
buffer.resize(handle.raw_size() / sizeof(DataType));
opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0]));
cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0]));
handle.cuda_handle() = cuda::memory_create(handle.raw_size(), &(buffer[0]));
break;
#endif
default:
Expand All @@ -449,7 +449,7 @@ namespace backend
case OPENCL_MEMORY:
buffer.resize(handle.raw_size() / sizeof(DataType));
cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0]));
handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0]));
handle.opencl_handle() = opencl::memory_create(new_ctx.opencl_context(), handle.raw_size(), &(buffer[0]));
break;
#endif
default:
Expand Down
102 changes: 93 additions & 9 deletions inst/include/viennacl/compressed_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,59 @@ namespace detail
// host to device:
//


/** @brief Copies a sparse matrix in CSR-format (three arrays) from the host to the device (either GPU or multi-core CPU)
*
* @param csr_rows The array containing the start and stop indices in the column and element array for each row (length rows+1)
* @param csr_cols The array containing the column indices
* @param csr_elements The nonzero entries
* @param num_rows The number of rows in the CSR matrix
* @param num_cols The number of columns in the CSR matrix
* @param num_nnz The number of nonzers in the CSR matrix
* @param gpu_matrix A compressed_matrix from ViennaCL
*/
template<typename IndexT, typename NumericT, unsigned int AlignmentV>
void copy(const IndexT *csr_rows,
const IndexT *csr_cols,
const NumericT *csr_elements,
vcl_size_t num_rows,
vcl_size_t num_cols,
vcl_size_t num_nnz,
compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
{
if ( num_rows > 0 && num_cols > 0 && num_nnz > 0)
{
viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), num_rows + 1);

if (sizeof(IndexT) != row_buffer.element_size()) // check whether indices are of the same length (same number of bits)
{
viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), num_nnz);

for (vcl_size_t i=0; i<=num_rows; ++i)
row_buffer.set(i, csr_rows[i]);
for (vcl_size_t i=0; i<num_nnz; ++i)
col_buffer.set(i, csr_cols[i]);

gpu_matrix.set(row_buffer.get(),
col_buffer.get(),
csr_elements,
num_rows,
num_cols,
num_nnz);
}
else
{
gpu_matrix.set(static_cast<const void*>(csr_rows),
static_cast<const void*>(csr_cols),
csr_elements,
num_rows,
num_cols,
num_nnz);
}
}
}


//provide copy-operation:
/** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU)
*
Expand Down Expand Up @@ -177,10 +230,19 @@ void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA,
assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );

//we just need to copy the CSR arrays:
viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1);
for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i)
row_buffer.set(i, ublas_matrix.index1_data()[i]);

typedef typename boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA>::const_iterator1 iterator1_t;
typedef typename boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA>::const_iterator2 iterator2_t;

unsigned int r = 0;
row_buffer.set(0, 0);
for (iterator1_t it1 = ublas_matrix.begin1(); it1 != ublas_matrix.end1(); it1++)
{
for (iterator2_t it2 = it1.begin(); it2 != it1.end(); it2++)
++r;
row_buffer.set(it1.index1() + 1, r);
}

viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz());
for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
Expand Down Expand Up @@ -559,6 +621,7 @@ void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
template<class NumericT, unsigned int AlignmentV /* see VCLForwards.h */>
class compressed_matrix
{
typedef compressed_matrix<NumericT, AlignmentV> self_type;
public:
typedef viennacl::backend::mem_handle handle_type;
typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type;
Expand Down Expand Up @@ -769,6 +832,33 @@ class compressed_matrix
generate_row_block_information();
}


compressed_matrix(compressed_matrix const & other) :
rows_(other.size1()), cols_(other.size2()), nonzeros_(other.nnz()), row_block_num_(other.row_block_num_)
{
viennacl::context const & ctx = viennacl::traits::context(other);

row_buffer_.switch_active_handle_id(ctx.memory_type());
col_buffer_.switch_active_handle_id(ctx.memory_type());
elements_.switch_active_handle_id(ctx.memory_type());
row_blocks_.switch_active_handle_id(ctx.memory_type());

if (rows_ > 0)
{
viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows_ + 1), ctx);
}
if (nonzeros_ > 0)
{
viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros_, ctx);
viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros_, ctx);
}
if (row_block_num_ > 0)
viennacl::backend::memory_create(row_blocks_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (row_block_num_ + 1), ctx);

self_type::operator=(other);
}


/** @brief Assignment a compressed matrix from possibly another memory domain. */
compressed_matrix & operator=(compressed_matrix const & other)
{
Expand Down Expand Up @@ -1087,12 +1177,6 @@ class compressed_matrix

}

private:
// /** @brief Copy constructor is by now not available. */
//compressed_matrix(compressed_matrix const &);

private:

vcl_size_t rows_;
vcl_size_t cols_;
vcl_size_t nonzeros_;
Expand Down
4 changes: 2 additions & 2 deletions inst/include/viennacl/device_specific/forwards.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ inline const char * expression_type_to_string(expression_type type)
}

/** @brief generate the string for a pointer kernel argument */
static std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name)
inline std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name)
{
return scalartype + ' ' + name + ",";
}

/** @brief generate the string for a pointer kernel argument */
static std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name)
inline std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name)
{
return address_space + " " + scalartype + "* " + name + ",";
}
Expand Down
12 changes: 12 additions & 0 deletions inst/include/viennacl/device_specific/tree_parsing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,21 +128,33 @@ inline const char * evaluate(scheduler::operation_node_type type)
//Function
case OPERATION_UNARY_ABS_TYPE : return "abs";
case OPERATION_UNARY_ACOS_TYPE : return "acos";
case OPERATION_UNARY_ACOSH_TYPE : return "acosh";
case OPERATION_UNARY_ASIN_TYPE : return "asin";
case OPERATION_UNARY_ASINH_TYPE : return "asinh";
case OPERATION_UNARY_ATAN_TYPE : return "atan";
case OPERATION_UNARY_ATANH_TYPE : return "atanh";
case OPERATION_UNARY_CEIL_TYPE : return "ceil";
case OPERATION_UNARY_COS_TYPE : return "cos";
case OPERATION_UNARY_COSH_TYPE : return "cosh";
case OPERATION_UNARY_ERF_TYPE : return "erf";
case OPERATION_UNARY_ERFC_TYPE : return "erfc";
case OPERATION_UNARY_EXP_TYPE : return "exp";
case OPERATION_UNARY_EXP10_TYPE : return "exp10";
case OPERATION_UNARY_EXP2_TYPE : return "exp2";
case OPERATION_UNARY_FABS_TYPE : return "fabs";
case OPERATION_UNARY_FLOOR_TYPE : return "floor";
case OPERATION_UNARY_LOG_TYPE : return "log";
case OPERATION_UNARY_LOG2_TYPE : return "log2";
case OPERATION_UNARY_LOG10_TYPE : return "log10";
case OPERATION_UNARY_ROUND_TYPE : return "round";
case OPERATION_UNARY_RSQRT_TYPE : return "rsqrt";
case OPERATION_UNARY_SIGN_TYPE : return "sign";
case OPERATION_UNARY_SIN_TYPE : return "sin";
case OPERATION_UNARY_SINH_TYPE : return "sinh";
case OPERATION_UNARY_SQRT_TYPE : return "sqrt";
case OPERATION_UNARY_TAN_TYPE : return "tan";
case OPERATION_UNARY_TANH_TYPE : return "tanh";
case OPERATION_UNARY_TRUNC_TYPE : return "trunc";

case OPERATION_UNARY_CAST_CHAR_TYPE : return "(char)";
case OPERATION_UNARY_CAST_UCHAR_TYPE : return "(uchar)";
Expand Down
11 changes: 11 additions & 0 deletions inst/include/viennacl/device_specific/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,21 +493,32 @@ inline bool elementwise_function(scheduler::op_element const & op)

|| op.type== OPERATION_UNARY_ABS_TYPE
|| op.type== OPERATION_UNARY_ACOS_TYPE
|| op.type== OPERATION_UNARY_ACOSH_TYPE
|| op.type== OPERATION_UNARY_ASIN_TYPE
|| op.type== OPERATION_UNARY_ASINH_TYPE
|| op.type== OPERATION_UNARY_ATAN_TYPE
|| op.type== OPERATION_UNARY_ATANH_TYPE
|| op.type== OPERATION_UNARY_CEIL_TYPE
|| op.type== OPERATION_UNARY_COS_TYPE
|| op.type== OPERATION_UNARY_COSH_TYPE
|| op.type== OPERATION_UNARY_ERF_TYPE
|| op.type== OPERATION_UNARY_ERFC_TYPE
|| op.type== OPERATION_UNARY_EXP_TYPE
|| op.type== OPERATION_UNARY_EXP2_TYPE
|| op.type== OPERATION_UNARY_EXP10_TYPE
|| op.type== OPERATION_UNARY_FABS_TYPE
|| op.type== OPERATION_UNARY_FLOOR_TYPE
|| op.type== OPERATION_UNARY_LOG_TYPE
|| op.type== OPERATION_UNARY_LOG2_TYPE
|| op.type== OPERATION_UNARY_LOG10_TYPE
|| op.type== OPERATION_UNARY_ROUND_TYPE
|| op.type== OPERATION_UNARY_RSQRT_TYPE
|| op.type== OPERATION_UNARY_SIN_TYPE
|| op.type== OPERATION_UNARY_SINH_TYPE
|| op.type== OPERATION_UNARY_SQRT_TYPE
|| op.type== OPERATION_UNARY_TAN_TYPE
|| op.type== OPERATION_UNARY_TANH_TYPE
|| op.type== OPERATION_UNARY_TRUNC_TYPE

|| op.type== OPERATION_BINARY_ELEMENT_POW_TYPE
|| op.type== OPERATION_BINARY_ELEMENT_EQ_TYPE
Expand Down
24 changes: 24 additions & 0 deletions inst/include/viennacl/forwards.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,20 @@ namespace viennacl
struct op_abs {};
/** @brief A tag class representing the acos() function */
struct op_acos {};
/** @brief A tag class representing the acosh() function */
struct op_acosh {};
/** @brief A tag class representing the asin() function */
struct op_asin {};
/** @brief A tag class representing the asinh() function */
struct op_asinh {};
/** @brief A tag class for representing the argmax() function */
struct op_argmax {};
/** @brief A tag class for representing the argmin() function */
struct op_argmin {};
/** @brief A tag class representing the atan() function */
struct op_atan {};
/** @brief A tag class representing the atanh() function */
struct op_atanh {};
/** @brief A tag class representing the atan2() function */
struct op_atan2 {};
/** @brief A tag class representing the ceil() function */
Expand All @@ -154,8 +160,16 @@ namespace viennacl
struct op_cos {};
/** @brief A tag class representing the cosh() function */
struct op_cosh {};
/** @brief A tag class representing the erf() function */
struct op_erf {};
/** @brief A tag class representing the erfc() function */
struct op_erfc {};
/** @brief A tag class representing the exp() function */
struct op_exp {};
/** @brief A tag class representing the exp2() function */
struct op_exp2 {};
/** @brief A tag class representing the exp10() function */
struct op_exp10 {};
/** @brief A tag class representing the fabs() function */
struct op_fabs {};
/** @brief A tag class representing the fdim() function */
Expand All @@ -170,8 +184,16 @@ namespace viennacl
struct op_fmod {};
/** @brief A tag class representing the log() function */
struct op_log {};
/** @brief A tag class representing the log2() function */
struct op_log2 {};
/** @brief A tag class representing the log10() function */
struct op_log10 {};
/** @brief A tag class representing the round() function */
struct op_round {};
/** @brief A tag class representing the rsqrt() function */
struct op_rsqrt {};
/** @brief A tag class representing the sign() function */
struct op_sign {};
/** @brief A tag class representing the sin() function */
struct op_sin {};
/** @brief A tag class representing the sinh() function */
Expand All @@ -182,6 +204,8 @@ namespace viennacl
struct op_tan {};
/** @brief A tag class representing the tanh() function */
struct op_tanh {};
/** @brief A tag class representing the trunc() function */
struct op_trunc {};

/** @brief A tag class representing the (off-)diagonal of a matrix */
struct op_matrix_diag {};
Expand Down
2 changes: 1 addition & 1 deletion inst/include/viennacl/linalg/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ namespace detail
CPU_NumericType norm_rhs_squared = ip_rr;
CPU_NumericType new_ipp_rr_over_norm_rhs;

if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
if (std::fabs(norm_rhs_squared) <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
return result;

for (unsigned int i = 0; i < tag.max_iterations(); ++i)
Expand Down
51 changes: 51 additions & 0 deletions inst/include/viennacl/linalg/cuda/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,57 @@ typename viennacl::enable_if< viennacl::is_cpu_scalar<ArgT>::value,
arg_reference(ArgT, double const & val) { return val; }

} //namespace detail

template<typename OpT>
struct cuda_unary_op {};

#define VIENNACL_CUDA_GENERATE_UNARY_OP(FUNCNAME) \
template<>\
struct cuda_unary_op<viennacl::op_##FUNCNAME> \
{ \
template<typename NumericT> \
static __device__ NumericT apply(NumericT x) { return FUNCNAME(x); } \
};

VIENNACL_CUDA_GENERATE_UNARY_OP(abs)
VIENNACL_CUDA_GENERATE_UNARY_OP(acos)
VIENNACL_CUDA_GENERATE_UNARY_OP(acosh)
VIENNACL_CUDA_GENERATE_UNARY_OP(asin)
VIENNACL_CUDA_GENERATE_UNARY_OP(asinh)
VIENNACL_CUDA_GENERATE_UNARY_OP(atan)
VIENNACL_CUDA_GENERATE_UNARY_OP(atanh)
VIENNACL_CUDA_GENERATE_UNARY_OP(ceil)
VIENNACL_CUDA_GENERATE_UNARY_OP(cos)
VIENNACL_CUDA_GENERATE_UNARY_OP(cosh)
VIENNACL_CUDA_GENERATE_UNARY_OP(erf)
VIENNACL_CUDA_GENERATE_UNARY_OP(erfc)
VIENNACL_CUDA_GENERATE_UNARY_OP(exp)
VIENNACL_CUDA_GENERATE_UNARY_OP(exp2)
VIENNACL_CUDA_GENERATE_UNARY_OP(exp10)
VIENNACL_CUDA_GENERATE_UNARY_OP(fabs)
VIENNACL_CUDA_GENERATE_UNARY_OP(floor)
VIENNACL_CUDA_GENERATE_UNARY_OP(log)
VIENNACL_CUDA_GENERATE_UNARY_OP(log2)
VIENNACL_CUDA_GENERATE_UNARY_OP(log10)
VIENNACL_CUDA_GENERATE_UNARY_OP(round)
VIENNACL_CUDA_GENERATE_UNARY_OP(rsqrt)
//VIENNACL_CUDA_GENERATE_UNARY_OP(sign) //implement manually below
VIENNACL_CUDA_GENERATE_UNARY_OP(sin)
VIENNACL_CUDA_GENERATE_UNARY_OP(sinh)
VIENNACL_CUDA_GENERATE_UNARY_OP(sqrt)
VIENNACL_CUDA_GENERATE_UNARY_OP(tan)
VIENNACL_CUDA_GENERATE_UNARY_OP(tanh)
VIENNACL_CUDA_GENERATE_UNARY_OP(trunc)

template<>
struct cuda_unary_op<viennacl::op_sign>
{
template<typename NumericT>
static __device__ NumericT apply(NumericT x) { return (x > NumericT(0)) ? NumericT(1) : (x < NumericT(0) ? NumericT(-1) : NumericT(0)); }
};

#undef VIENNACL_CUDA_GENERATE_UNARY_OP

} //namespace cuda
} //namespace linalg
} //namespace viennacl
Expand Down
Loading

0 comments on commit f3a5ddc

Please sign in to comment.