Skip to content
1 change: 1 addition & 0 deletions include/spblas/vendor/cusparse/cusparse.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#pragma once

#include "multiply.hpp"
#include "simple_op.hpp"
34 changes: 34 additions & 0 deletions include/spblas/vendor/cusparse/exception.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <cusparse.h>
#include <stdexcept>
Expand Down Expand Up @@ -57,6 +58,39 @@ void throw_if_error(cusparseStatus_t error_code) {
}
}

// Throw an exception if the cublasStatus_t is not CUBLAS_STATUS_SUCCESS.
void throw_if_error(cublasStatus_t error_code) {
if (error_code == CUBLAS_STATUS_SUCCESS) {
return;
} else if (error_code == CUBLAS_STATUS_NOT_INITIALIZED) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_INITIALIZED\"");
} else if (error_code == CUBLAS_STATUS_ALLOC_FAILED) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_ALLOC_FAILED\"");
} else if (error_code == CUBLAS_STATUS_INVALID_VALUE) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_INVALID_VALUE\"");
} else if (error_code == CUBLAS_STATUS_ARCH_MISMATCH) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_ARCH_MISMATCH\"");
} else if (error_code == CUBLAS_STATUS_MAPPING_ERROR) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_MAPPING_ERROR\"");
} else if (error_code == CUBLAS_STATUS_EXECUTION_FAILED) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_EXECUTION_FAILED\"");
} else if (error_code == CUBLAS_STATUS_INTERNAL_ERROR) {
throw std::runtime_error("cuBLAS encountered an error: "
"\"CUBLAS_STATUS_INTERNAL_ERROR\"");
} else if (error_code == CUBLAS_STATUS_NOT_SUPPORTED) {
throw std::runtime_error(
"cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_SUPPORTED\"");
} else {
throw std::runtime_error("cuBLAS encountered an error: \"unknown error\"");
}
}

} // namespace __cusparse

} // namespace spblas
152 changes: 152 additions & 0 deletions include/spblas/vendor/cusparse/simple_op.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#pragma once

#include <stdexcept>
#include <type_traits>

#include <cublas_v2.h>
#include <cuda_runtime.h>

#include <spblas/detail/operation_info_t.hpp>
#include <spblas/detail/ranges.hpp>
#include <spblas/detail/view_inspectors.hpp>

#include "cuda_allocator.hpp"
#include "detail/cusparse_tensors.hpp"
#include "exception.hpp"
#include "types.hpp"

namespace spblas {

class simple_operation_state_t {
public:
simple_operation_state_t()
: simple_operation_state_t(cusparse::cuda_allocator<char>{}) {}

simple_operation_state_t(cusparse::cuda_allocator<char> alloc)
: alloc_(alloc) {
cublasHandle_t handle;
__cusparse::throw_if_error(cublasCreate(&handle));
if (auto stream = alloc.stream()) {
__cusparse::throw_if_error(cublasSetStream(handle, stream));
}
handle_ = handle_manager(handle, [](cublasHandle_t handle) {
__cusparse::throw_if_error(cublasDestroy(handle));
});
}

simple_operation_state_t(cusparse::cuda_allocator<char> alloc,
cublasHandle_t handle)
: alloc_(alloc) {
handle_ = handle_manager(handle, [](cublasHandle_t handle) {
// it is provided by user, we do not delete it at all.
});
}

template <matrix A>
requires __detail::has_csr_base<A>
void scale(typename std::remove_reference_t<A>::scalar_type val, A&& a) {
auto a_base = __detail::get_ultimate_base(a);
using matrix_type = decltype(a_base);
using value_type = typename matrix_type::scalar_type;
if constexpr (std::is_same_v<value_type, float>) {
__cusparse::throw_if_error(
cublasSscal(handle_.get(), static_cast<int>(a_base.values().size()),
&val, a_base.values().data(), 1));
} else if constexpr (std::is_same_v<value_type, double>) {
__cusparse::throw_if_error(
cublasDscal(handle_.get(), static_cast<int>(a_base.values().size()),
&val, a_base.values().data(), 1));
} else {
throw std::runtime_error("not implemented");
}
}

template <matrix A>
requires __detail::has_csr_base<A>
typename std::remove_reference_t<A>::scalar_type matrix_inf_norm(A&& a) {
auto a_base = __detail::get_ultimate_base(a);
using matrix_type = decltype(a_base);
using value_type = typename matrix_type::scalar_type;
using index_type = typename matrix_type::index_type;
value_type result = 0;
// very slow implementation by calling cublas row by row
for (int i = 0; i < __backend::shape(a_base)[0]; i++) {
value_type tmp = 0;
index_type start, end;
__cusparse::throw_if_error(cudaMemcpy(&start, a_base.rowptr().data() + i,
sizeof(index_type),
cudaMemcpyDeviceToHost));
__cusparse::throw_if_error(
cudaMemcpy(&end, a_base.rowptr().data() + i + 1, sizeof(index_type),
cudaMemcpyDeviceToHost));
if constexpr (std::is_same_v<value_type, float>) {
__cusparse::throw_if_error(cublasSasum(handle_.get(), end - start,
a_base.values().data() + start,
1, &tmp));
} else if constexpr (std::is_same_v<value_type, double>) {
__cusparse::throw_if_error(cublasDasum(handle_.get(), end - start,
a_base.values().data() + start,
1, &tmp));
} else {
throw std::runtime_error("not implemented");
}
result = std::max(result, tmp);
}
return result;
}

template <matrix A>
requires __detail::has_csr_base<A>
typename std::remove_reference_t<A>::scalar_type matrix_frob_norm(A&& a) {
auto a_base = __detail::get_ultimate_base(a);
using matrix_type = decltype(a_base);
using value_type = typename matrix_type::scalar_type;
value_type result(0.0);
if constexpr (std::is_same_v<value_type, float>) {
__cusparse::throw_if_error(
cublasSnrm2(handle_.get(), static_cast<int>(a_base.values().size()),
a_base.values().data(), 1, &result));
} else if constexpr (std::is_same_v<value_type, double>) {
__cusparse::throw_if_error(
cublasDnrm2(handle_.get(), static_cast<int>(a_base.values().size()),
a_base.values().data(), 1, &result));
} else {
throw std::runtime_error("not implemented");
}
return result;
}

private:
using handle_manager =
std::unique_ptr<std::pointer_traits<cublasHandle_t>::element_type,
std::function<void(cublasHandle_t)>>;
handle_manager handle_;
cusparse::cuda_allocator<char> alloc_;
};

using scale_state_t = simple_operation_state_t;
using matrix_inf_norm_state_t = simple_operation_state_t;
using matrix_frob_norm_state_t = simple_operation_state_t;

template <matrix A>
requires __detail::has_csr_base<A>
void scale(scale_state_t& state,
typename std::remove_reference_t<A>::scalar_type val, A&& a) {
state.scale(val, a);
}

template <matrix A>
requires __detail::has_csr_base<A>
typename std::remove_reference_t<A>::scalar_type
matrix_inf_norm(matrix_inf_norm_state_t& state, A&& a) {
return state.matrix_inf_norm(a);
}

template <matrix A>
requires __detail::has_csr_base<A>
typename std::remove_reference_t<A>::scalar_type
matrix_frob_norm(matrix_frob_norm_state_t& state, A&& a) {
return state.matrix_frob_norm(a);
}

} // namespace spblas
4 changes: 2 additions & 2 deletions include/spblas/vendor/rocsparse/exception.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace spblas {
namespace __rocsparse {

// Throw an exception if the hipError_t is not hipSuccess.
void throw_if_error(hipError_t error_code, std::string prefix = "") {
inline void throw_if_error(hipError_t error_code, std::string prefix = "") {
if (error_code == hipSuccess) {
return;
}
Expand All @@ -21,7 +21,7 @@ void throw_if_error(hipError_t error_code, std::string prefix = "") {
}

// Throw an exception if the rocsparse_status is not rocsparse_status_success.
void throw_if_error(rocsparse_status error_code) {
inline void throw_if_error(rocsparse_status error_code) {
if (error_code == rocsparse_status_success) {
return;
} else if (error_code == rocsparse_status_invalid_handle) {
Expand Down
Loading
Loading