updated to most recent viennacl-dev code

cdeterman · Oct 12, 2017 · f3a5ddc · f3a5ddc
1 parent 5b6eb89
commit f3a5ddc
Show file tree

Hide file tree

Showing 39 changed files with 2,121 additions and 1,835 deletions.
diff --git a/inst/include/viennacl/backend/memory.hpp b/inst/include/viennacl/backend/memory.hpp
@@ -425,7 +425,7 @@ namespace backend
         case CUDA_MEMORY:
           buffer.resize(handle.raw_size() / sizeof(DataType));
           opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0]));
-          cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0]));
+          handle.cuda_handle() = cuda::memory_create(handle.raw_size(), &(buffer[0]));
           break;
 #endif
         default:
@@ -449,7 +449,7 @@ namespace backend
         case OPENCL_MEMORY:
           buffer.resize(handle.raw_size() / sizeof(DataType));
           cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0]));
-          handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0]));
+          handle.opencl_handle() = opencl::memory_create(new_ctx.opencl_context(), handle.raw_size(), &(buffer[0]));
           break;
 #endif
         default:

diff --git a/inst/include/viennacl/compressed_matrix.hpp b/inst/include/viennacl/compressed_matrix.hpp
@@ -94,6 +94,59 @@ namespace detail
 // host to device:
 //
 
+
+/** @brief Copies a sparse matrix in CSR-format (three arrays) from the host to the device (either GPU or multi-core CPU)
+  *
+  * @param csr_rows     The array containing the start and stop indices in the column and element array for each row (length rows+1)
+  * @param csr_cols     The array containing the column indices
+  * @param csr_elements The nonzero entries
+  * @param num_rows     The number of rows in the CSR matrix
+  * @param num_cols     The number of columns in the CSR matrix
+  * @param num_nnz      The number of nonzers in the CSR matrix
+  * @param gpu_matrix   A compressed_matrix from ViennaCL
+  */
+template<typename IndexT, typename NumericT, unsigned int AlignmentV>
+void copy(const IndexT   *csr_rows,
+          const IndexT   *csr_cols,
+          const NumericT *csr_elements,
+          vcl_size_t num_rows,
+          vcl_size_t num_cols,
+          vcl_size_t num_nnz,
+          compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
+{
+  if ( num_rows > 0 && num_cols > 0 && num_nnz > 0)
+  {
+    viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), num_rows + 1);
+
+    if (sizeof(IndexT) != row_buffer.element_size()) // check whether indices are of the same length (same number of bits)
+    {
+      viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), num_nnz);
+
+      for (vcl_size_t i=0; i<=num_rows; ++i)
+        row_buffer.set(i, csr_rows[i]);
+      for (vcl_size_t i=0; i<num_nnz; ++i)
+        col_buffer.set(i, csr_cols[i]);
+
+      gpu_matrix.set(row_buffer.get(),
+                     col_buffer.get(),
+                     csr_elements,
+                     num_rows,
+                     num_cols,
+                     num_nnz);
+    }
+    else
+    {
+      gpu_matrix.set(static_cast<const void*>(csr_rows),
+                     static_cast<const void*>(csr_cols),
+                     csr_elements,
+                     num_rows,
+                     num_cols,
+                     num_nnz);
+    }
+  }
+}
+
+
 //provide copy-operation:
 /** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU)
   *
@@ -177,10 +230,19 @@ void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA,
   assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
   assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
 
-  //we just need to copy the CSR arrays:
   viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1);
-  for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i)
-    row_buffer.set(i, ublas_matrix.index1_data()[i]);
+
+  typedef typename boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA>::const_iterator1 iterator1_t;
+  typedef typename boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA>::const_iterator2 iterator2_t;
+
+  unsigned int r = 0;
+  row_buffer.set(0, 0);
+  for (iterator1_t it1 = ublas_matrix.begin1(); it1 != ublas_matrix.end1(); it1++)
+  {
+    for (iterator2_t it2 = it1.begin(); it2 != it1.end(); it2++)
+      ++r;
+    row_buffer.set(it1.index1() + 1, r);
+  }
 
   viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz());
   for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
@@ -559,6 +621,7 @@ void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
 template<class NumericT, unsigned int AlignmentV /* see VCLForwards.h */>
 class compressed_matrix
 {
+  typedef compressed_matrix<NumericT, AlignmentV>                                                  self_type;
 public:
   typedef viennacl::backend::mem_handle                                                              handle_type;
   typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType>   value_type;
@@ -769,6 +832,33 @@ class compressed_matrix
     generate_row_block_information();
   }
 
+
+  compressed_matrix(compressed_matrix const & other) :
+    rows_(other.size1()), cols_(other.size2()), nonzeros_(other.nnz()), row_block_num_(other.row_block_num_)
+  {
+    viennacl::context const & ctx = viennacl::traits::context(other);
+
+    row_buffer_.switch_active_handle_id(ctx.memory_type());
+    col_buffer_.switch_active_handle_id(ctx.memory_type());
+    elements_.switch_active_handle_id(ctx.memory_type());
+    row_blocks_.switch_active_handle_id(ctx.memory_type());
+
+    if (rows_ > 0)
+    {
+      viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows_ + 1), ctx);
+    }
+    if (nonzeros_ > 0)
+    {
+      viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros_, ctx);
+      viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros_, ctx);
+    }
+    if (row_block_num_ > 0)
+      viennacl::backend::memory_create(row_blocks_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (row_block_num_ + 1), ctx);
+
+    self_type::operator=(other);
+  }
+
+
   /** @brief Assignment a compressed matrix from possibly another memory domain. */
   compressed_matrix & operator=(compressed_matrix const & other)
   {
@@ -1087,12 +1177,6 @@ class compressed_matrix
 
   }
 
-private:
-  // /** @brief Copy constructor is by now not available. */
-  //compressed_matrix(compressed_matrix const &);
-
-private:
-
   vcl_size_t rows_;
   vcl_size_t cols_;
   vcl_size_t nonzeros_;

diff --git a/inst/include/viennacl/device_specific/forwards.h b/inst/include/viennacl/device_specific/forwards.h
@@ -127,13 +127,13 @@ inline const char * expression_type_to_string(expression_type type)
 }
 
 /** @brief generate the string for a pointer kernel argument */
-static std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name)
+inline std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name)
 {
   return scalartype + ' ' + name + ",";
 }
 
 /** @brief generate the string for a pointer kernel argument */
-static std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name)
+inline std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name)
 {
   return address_space +  " " + scalartype + "* " + name + ",";
 }

diff --git a/inst/include/viennacl/device_specific/tree_parsing.hpp b/inst/include/viennacl/device_specific/tree_parsing.hpp
@@ -128,21 +128,33 @@ inline const char * evaluate(scheduler::operation_node_type type)
   //Function
   case OPERATION_UNARY_ABS_TYPE : return "abs";
   case OPERATION_UNARY_ACOS_TYPE : return "acos";
+  case OPERATION_UNARY_ACOSH_TYPE : return "acosh";
   case OPERATION_UNARY_ASIN_TYPE : return "asin";
+  case OPERATION_UNARY_ASINH_TYPE : return "asinh";
   case OPERATION_UNARY_ATAN_TYPE : return "atan";
+  case OPERATION_UNARY_ATANH_TYPE : return "atanh";
   case OPERATION_UNARY_CEIL_TYPE : return "ceil";
   case OPERATION_UNARY_COS_TYPE : return "cos";
   case OPERATION_UNARY_COSH_TYPE : return "cosh";
+  case OPERATION_UNARY_ERF_TYPE : return "erf";
+  case OPERATION_UNARY_ERFC_TYPE : return "erfc";
   case OPERATION_UNARY_EXP_TYPE : return "exp";
+  case OPERATION_UNARY_EXP10_TYPE : return "exp10";
+  case OPERATION_UNARY_EXP2_TYPE : return "exp2";
   case OPERATION_UNARY_FABS_TYPE : return "fabs";
   case OPERATION_UNARY_FLOOR_TYPE : return "floor";
   case OPERATION_UNARY_LOG_TYPE : return "log";
+  case OPERATION_UNARY_LOG2_TYPE : return "log2";
   case OPERATION_UNARY_LOG10_TYPE : return "log10";
+  case OPERATION_UNARY_ROUND_TYPE : return "round";
+  case OPERATION_UNARY_RSQRT_TYPE : return "rsqrt";
+  case OPERATION_UNARY_SIGN_TYPE : return "sign";
   case OPERATION_UNARY_SIN_TYPE : return "sin";
   case OPERATION_UNARY_SINH_TYPE : return "sinh";
   case OPERATION_UNARY_SQRT_TYPE : return "sqrt";
   case OPERATION_UNARY_TAN_TYPE : return "tan";
   case OPERATION_UNARY_TANH_TYPE : return "tanh";
+  case OPERATION_UNARY_TRUNC_TYPE : return "trunc";
 
   case OPERATION_UNARY_CAST_CHAR_TYPE : return "(char)";
   case OPERATION_UNARY_CAST_UCHAR_TYPE : return "(uchar)";

diff --git a/inst/include/viennacl/device_specific/utils.hpp b/inst/include/viennacl/device_specific/utils.hpp
@@ -493,21 +493,32 @@ inline bool elementwise_function(scheduler::op_element const & op)
 
       || op.type== OPERATION_UNARY_ABS_TYPE
       || op.type== OPERATION_UNARY_ACOS_TYPE
+      || op.type== OPERATION_UNARY_ACOSH_TYPE
       || op.type== OPERATION_UNARY_ASIN_TYPE
+      || op.type== OPERATION_UNARY_ASINH_TYPE
       || op.type== OPERATION_UNARY_ATAN_TYPE
+      || op.type== OPERATION_UNARY_ATANH_TYPE
       || op.type== OPERATION_UNARY_CEIL_TYPE
       || op.type== OPERATION_UNARY_COS_TYPE
       || op.type== OPERATION_UNARY_COSH_TYPE
+      || op.type== OPERATION_UNARY_ERF_TYPE
+      || op.type== OPERATION_UNARY_ERFC_TYPE
       || op.type== OPERATION_UNARY_EXP_TYPE
+      || op.type== OPERATION_UNARY_EXP2_TYPE
+      || op.type== OPERATION_UNARY_EXP10_TYPE
       || op.type== OPERATION_UNARY_FABS_TYPE
       || op.type== OPERATION_UNARY_FLOOR_TYPE
       || op.type== OPERATION_UNARY_LOG_TYPE
+      || op.type== OPERATION_UNARY_LOG2_TYPE
       || op.type== OPERATION_UNARY_LOG10_TYPE
+      || op.type== OPERATION_UNARY_ROUND_TYPE
+      || op.type== OPERATION_UNARY_RSQRT_TYPE
       || op.type== OPERATION_UNARY_SIN_TYPE
       || op.type== OPERATION_UNARY_SINH_TYPE
       || op.type== OPERATION_UNARY_SQRT_TYPE
       || op.type== OPERATION_UNARY_TAN_TYPE
       || op.type== OPERATION_UNARY_TANH_TYPE
+      || op.type== OPERATION_UNARY_TRUNC_TYPE
 
       || op.type== OPERATION_BINARY_ELEMENT_POW_TYPE
       || op.type== OPERATION_BINARY_ELEMENT_EQ_TYPE

diff --git a/inst/include/viennacl/forwards.h b/inst/include/viennacl/forwards.h
@@ -138,14 +138,20 @@ namespace viennacl
   struct op_abs {};
   /** @brief A tag class representing the acos() function */
   struct op_acos {};
+  /** @brief A tag class representing the acosh() function */
+  struct op_acosh {};
   /** @brief A tag class representing the asin() function */
   struct op_asin {};
+  /** @brief A tag class representing the asinh() function */
+  struct op_asinh {};
   /** @brief A tag class for representing the argmax() function */
   struct op_argmax {};
   /** @brief A tag class for representing the argmin() function */
   struct op_argmin {};
   /** @brief A tag class representing the atan() function */
   struct op_atan {};
+  /** @brief A tag class representing the atanh() function */
+  struct op_atanh {};
   /** @brief A tag class representing the atan2() function */
   struct op_atan2 {};
   /** @brief A tag class representing the ceil() function */
@@ -154,8 +160,16 @@ namespace viennacl
   struct op_cos {};
   /** @brief A tag class representing the cosh() function */
   struct op_cosh {};
+  /** @brief A tag class representing the erf() function */
+  struct op_erf {};
+  /** @brief A tag class representing the erfc() function */
+  struct op_erfc {};
   /** @brief A tag class representing the exp() function */
   struct op_exp {};
+  /** @brief A tag class representing the exp2() function */
+  struct op_exp2 {};
+  /** @brief A tag class representing the exp10() function */
+  struct op_exp10 {};
   /** @brief A tag class representing the fabs() function */
   struct op_fabs {};
   /** @brief A tag class representing the fdim() function */
@@ -170,8 +184,16 @@ namespace viennacl
   struct op_fmod {};
   /** @brief A tag class representing the log() function */
   struct op_log {};
+  /** @brief A tag class representing the log2() function */
+  struct op_log2 {};
   /** @brief A tag class representing the log10() function */
   struct op_log10 {};
+  /** @brief A tag class representing the round() function */
+  struct op_round {};
+  /** @brief A tag class representing the rsqrt() function */
+  struct op_rsqrt {};
+  /** @brief A tag class representing the sign() function */
+  struct op_sign {};
   /** @brief A tag class representing the sin() function */
   struct op_sin {};
   /** @brief A tag class representing the sinh() function */
@@ -182,6 +204,8 @@ namespace viennacl
   struct op_tan {};
   /** @brief A tag class representing the tanh() function */
   struct op_tanh {};
+  /** @brief A tag class representing the trunc() function */
+  struct op_trunc {};
 
   /** @brief A tag class representing the (off-)diagonal of a matrix */
   struct op_matrix_diag {};

diff --git a/inst/include/viennacl/linalg/cg.hpp b/inst/include/viennacl/linalg/cg.hpp
@@ -284,7 +284,7 @@ namespace detail
     CPU_NumericType norm_rhs_squared = ip_rr;
     CPU_NumericType new_ipp_rr_over_norm_rhs;
 
-    if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
+    if (std::fabs(norm_rhs_squared) <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
       return result;
 
     for (unsigned int i = 0; i < tag.max_iterations(); ++i)

diff --git a/inst/include/viennacl/linalg/cuda/common.hpp b/inst/include/viennacl/linalg/cuda/common.hpp
@@ -243,6 +243,57 @@ typename viennacl::enable_if< viennacl::is_cpu_scalar<ArgT>::value,
 arg_reference(ArgT, double const & val)  { return val; }
 
 } //namespace detail
+
+template<typename OpT>
+struct cuda_unary_op {};
+
+#define VIENNACL_CUDA_GENERATE_UNARY_OP(FUNCNAME)  \
+template<>\
+struct cuda_unary_op<viennacl::op_##FUNCNAME> \
+{ \
+  template<typename NumericT> \
+  static __device__ NumericT apply(NumericT x) { return FUNCNAME(x); } \
+};
+
+VIENNACL_CUDA_GENERATE_UNARY_OP(abs)
+VIENNACL_CUDA_GENERATE_UNARY_OP(acos)
+VIENNACL_CUDA_GENERATE_UNARY_OP(acosh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(asin)
+VIENNACL_CUDA_GENERATE_UNARY_OP(asinh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(atan)
+VIENNACL_CUDA_GENERATE_UNARY_OP(atanh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(ceil)
+VIENNACL_CUDA_GENERATE_UNARY_OP(cos)
+VIENNACL_CUDA_GENERATE_UNARY_OP(cosh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(erf)
+VIENNACL_CUDA_GENERATE_UNARY_OP(erfc)
+VIENNACL_CUDA_GENERATE_UNARY_OP(exp)
+VIENNACL_CUDA_GENERATE_UNARY_OP(exp2)
+VIENNACL_CUDA_GENERATE_UNARY_OP(exp10)
+VIENNACL_CUDA_GENERATE_UNARY_OP(fabs)
+VIENNACL_CUDA_GENERATE_UNARY_OP(floor)
+VIENNACL_CUDA_GENERATE_UNARY_OP(log)
+VIENNACL_CUDA_GENERATE_UNARY_OP(log2)
+VIENNACL_CUDA_GENERATE_UNARY_OP(log10)
+VIENNACL_CUDA_GENERATE_UNARY_OP(round)
+VIENNACL_CUDA_GENERATE_UNARY_OP(rsqrt)
+//VIENNACL_CUDA_GENERATE_UNARY_OP(sign) //implement manually below
+VIENNACL_CUDA_GENERATE_UNARY_OP(sin)
+VIENNACL_CUDA_GENERATE_UNARY_OP(sinh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(sqrt)
+VIENNACL_CUDA_GENERATE_UNARY_OP(tan)
+VIENNACL_CUDA_GENERATE_UNARY_OP(tanh)
+VIENNACL_CUDA_GENERATE_UNARY_OP(trunc)
+
+template<>
+struct cuda_unary_op<viennacl::op_sign>
+{
+  template<typename NumericT>
+  static __device__ NumericT apply(NumericT x) { return (x > NumericT(0)) ? NumericT(1) : (x < NumericT(0) ? NumericT(-1) : NumericT(0)); }
+};
+
+#undef VIENNACL_CUDA_GENERATE_UNARY_OP
+
 } //namespace cuda
 } //namespace linalg
 } //namespace viennacl