17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22#ifndef KOKKOS_CUDASPACE_HPP
23#define KOKKOS_CUDASPACE_HPP
25#include <Kokkos_Macros.hpp>
26#if defined(KOKKOS_ENABLE_CUDA)
28#include <Kokkos_Core_fwd.hpp>
35#include <Kokkos_HostSpace.hpp>
36#include <impl/Kokkos_SharedAlloc.hpp>
38#include <impl/Kokkos_Profiling_Interface.hpp>
40#include <Cuda/Kokkos_Cuda_abort.hpp>
42#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
43extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
44extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(
bool);
53struct is_cuda_type_space :
public std::false_type {};
62 using memory_space = CudaSpace;
66 using size_type =
unsigned int;
71 CudaSpace(CudaSpace&& rhs) =
default;
72 CudaSpace(
const CudaSpace& rhs) =
default;
73 CudaSpace& operator=(CudaSpace&& rhs) =
default;
74 CudaSpace& operator=(
const CudaSpace& rhs) =
default;
75 ~CudaSpace() =
default;
78 void* allocate(
const Cuda& exec_space,
const size_t arg_alloc_size)
const;
79 void* allocate(
const Cuda& exec_space,
const char* arg_label,
80 const size_t arg_alloc_size,
81 const size_t arg_logical_size = 0)
const;
82 void* allocate(
const size_t arg_alloc_size)
const;
83 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
84 const size_t arg_logical_size = 0)
const;
87 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
88 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
89 const size_t arg_alloc_size,
90 const size_t arg_logical_size = 0)
const;
93 template <
class,
class,
class,
class>
95 void* impl_allocate(
const Cuda& exec_space,
const char* arg_label,
96 const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0,
98 const Kokkos::Tools::SpaceHandle =
99 Kokkos::Tools::make_space_handle(name()))
const;
100 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
101 const size_t arg_logical_size = 0,
102 const Kokkos::Tools::SpaceHandle =
103 Kokkos::Tools::make_space_handle(name()))
const;
104 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
105 const size_t arg_alloc_size,
106 const size_t arg_logical_size = 0,
107 const Kokkos::Tools::SpaceHandle =
108 Kokkos::Tools::make_space_handle(name()))
const;
112 static constexpr const char* name() {
return m_name; }
117 static constexpr const char* m_name =
"Cuda";
118 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
122struct Impl::is_cuda_type_space<CudaSpace> :
public std::true_type {};
137 using memory_space = CudaUVMSpace;
138 using execution_space = Cuda;
140 using size_type =
unsigned int;
142#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
144 KOKKOS_DEPRECATED
static bool available();
152 CudaUVMSpace(CudaUVMSpace&& rhs) =
default;
153 CudaUVMSpace(
const CudaUVMSpace& rhs) =
default;
154 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) =
default;
155 CudaUVMSpace& operator=(
const CudaUVMSpace& rhs) =
default;
156 ~CudaUVMSpace() =
default;
159 void* allocate(
const size_t arg_alloc_size)
const;
160 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
161 const size_t arg_logical_size = 0)
const;
164 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
165 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
166 const size_t arg_alloc_size,
167 const size_t arg_logical_size = 0)
const;
170 template <
class,
class,
class,
class>
172 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
173 const size_t arg_logical_size = 0,
174 const Kokkos::Tools::SpaceHandle =
175 Kokkos::Tools::make_space_handle(name()))
const;
176 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0,
179 const Kokkos::Tools::SpaceHandle =
180 Kokkos::Tools::make_space_handle(name()))
const;
184 static constexpr const char* name() {
return m_name; }
186#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
187 static bool cuda_pin_uvm_to_host();
188 static void cuda_set_pin_uvm_to_host(
bool val);
195#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
196 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
198 static constexpr const char* m_name =
"CudaUVM";
202struct Impl::is_cuda_type_space<CudaUVMSpace> :
public std::true_type {};
214class CudaHostPinnedSpace {
219 using memory_space = CudaHostPinnedSpace;
221 using size_type =
unsigned int;
225 CudaHostPinnedSpace();
226 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) =
default;
227 CudaHostPinnedSpace(
const CudaHostPinnedSpace& rhs) =
default;
228 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) =
default;
229 CudaHostPinnedSpace& operator=(
const CudaHostPinnedSpace& rhs) =
default;
230 ~CudaHostPinnedSpace() =
default;
233 void* allocate(
const size_t arg_alloc_size)
const;
234 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
235 const size_t arg_logical_size = 0)
const;
238 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
239 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
240 const size_t arg_alloc_size,
241 const size_t arg_logical_size = 0)
const;
244 template <
class,
class,
class,
class>
246 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
247 const size_t arg_logical_size = 0,
248 const Kokkos::Tools::SpaceHandle =
249 Kokkos::Tools::make_space_handle(name()))
const;
250 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
251 const size_t arg_alloc_size,
252 const size_t arg_logical_size = 0,
253 const Kokkos::Tools::SpaceHandle =
254 Kokkos::Tools::make_space_handle(name()))
const;
258 static constexpr const char* name() {
return m_name; }
261 static constexpr const char* m_name =
"CudaHostPinned";
267struct Impl::is_cuda_type_space<CudaHostPinnedSpace> :
public std::true_type {};
277cudaStream_t cuda_get_deep_copy_stream();
279const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
280 bool initialize =
true);
297 enum :
bool { assignable =
false };
298 enum :
bool { accessible =
false };
299 enum :
bool { deepcopy =
true };
305 enum :
bool { assignable =
false };
306 enum :
bool { accessible =
true };
307 enum :
bool { deepcopy =
true };
313 enum :
bool { assignable =
true };
314 enum :
bool { accessible =
true };
315 enum :
bool { deepcopy =
true };
322 enum :
bool { assignable =
false };
323 enum :
bool { accessible =
false };
324 enum :
bool { deepcopy =
true };
330 enum :
bool { assignable =
true };
331 enum :
bool { accessible =
true };
332 enum :
bool { deepcopy =
true };
338 enum :
bool { assignable =
false };
339 enum :
bool { accessible =
true };
340 enum :
bool { deepcopy =
true };
349 enum :
bool { assignable =
false };
350 enum :
bool { accessible =
false };
351 enum :
bool { deepcopy =
true };
358 enum :
bool { assignable =
false };
361 enum :
bool { accessible =
true };
362 enum :
bool { deepcopy =
true };
368 enum :
bool { assignable =
false };
369 enum :
bool { accessible =
true };
370 enum :
bool { deepcopy =
true };
379 enum :
bool { assignable =
false };
380 enum :
bool { accessible =
true };
381 enum :
bool { deepcopy =
true };
386 enum :
bool { assignable =
false };
387 enum :
bool { accessible =
false };
388 enum :
bool { deepcopy =
true };
393 enum :
bool { assignable =
false };
394 enum :
bool { accessible =
true };
395 enum :
bool { deepcopy =
true };
409void DeepCopyCuda(
void* dst,
const void* src,
size_t n);
410void DeepCopyAsyncCuda(
const Cuda& instance,
void* dst,
const void* src,
412void DeepCopyAsyncCuda(
void* dst,
const void* src,
size_t n);
414template <
class MemSpace>
415struct DeepCopy<MemSpace, HostSpace, Cuda,
416 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
417 DeepCopy(
void* dst,
const void* src,
size_t n) { DeepCopyCuda(dst, src, n); }
418 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
419 DeepCopyAsyncCuda(instance, dst, src, n);
423template <
class MemSpace>
424struct DeepCopy<HostSpace, MemSpace, Cuda,
425 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
426 DeepCopy(
void* dst,
const void* src,
size_t n) { DeepCopyCuda(dst, src, n); }
427 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
428 DeepCopyAsyncCuda(instance, dst, src, n);
432template <
class MemSpace1,
class MemSpace2>
433struct DeepCopy<MemSpace1, MemSpace2, Cuda,
434 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
435 is_cuda_type_space<MemSpace2>::value>> {
436 DeepCopy(
void* dst,
const void* src,
size_t n) { DeepCopyCuda(dst, src, n); }
437 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
438 DeepCopyAsyncCuda(instance, dst, src, n);
442template <
class MemSpace1,
class MemSpace2,
class ExecutionSpace>
443struct DeepCopy<MemSpace1, MemSpace2, ExecutionSpace,
444 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
445 is_cuda_type_space<MemSpace2>::value &&
446 !std::is_same<ExecutionSpace, Cuda>::value>> {
447 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
448 DeepCopyCuda(dst, src, n);
451 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
453 exec.fence(fence_string());
454 DeepCopyAsyncCuda(dst, src, n);
458 static const std::string& fence_string() {
459 static const std::string
string =
460 std::string(
"Kokkos::Impl::DeepCopy<") + MemSpace1::name() +
"Space, " +
462 "Space, ExecutionSpace>::DeepCopy: fence before copy";
467template <
class MemSpace,
class ExecutionSpace>
468struct DeepCopy<MemSpace, HostSpace, ExecutionSpace,
469 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
470 !std::is_same<ExecutionSpace, Cuda>::value>> {
471 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
472 DeepCopyCuda(dst, src, n);
475 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
477 exec.fence(fence_string());
478 DeepCopyAsyncCuda(dst, src, n);
482 static const std::string& fence_string() {
483 static const std::string
string =
484 std::string(
"Kokkos::Impl::DeepCopy<") + MemSpace::name() +
485 "Space, HostSpace, ExecutionSpace>::DeepCopy: fence before copy";
490template <
class MemSpace,
class ExecutionSpace>
491struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
492 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
493 !std::is_same<ExecutionSpace, Cuda>::value>> {
494 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
495 DeepCopyCuda(dst, src, n);
498 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
500 exec.fence(fence_string());
501 DeepCopyAsyncCuda(dst, src, n);
505 static const std::string& fence_string() {
506 static const std::string
string =
507 std::string(
"Kokkos::Impl::DeepCopy<HostSpace, ") + MemSpace::name() +
508 "Space, ExecutionSpace>::DeepCopy: fence before copy";
523class SharedAllocationRecord<Kokkos::CudaSpace, void>
524 :
public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
526 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
527 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
528 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
530 using RecordBase = SharedAllocationRecord<void, void>;
532 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
534 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
535 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
537 static ::cudaTextureObject_t attach_texture_object(
538 const unsigned sizeof_alias, void* const alloc_ptr,
539 const size_t alloc_size);
541#ifdef KOKKOS_ENABLE_DEBUG
542 static RecordBase s_root_record;
545 ::cudaTextureObject_t m_tex_obj = 0;
549 ~SharedAllocationRecord();
550 SharedAllocationRecord() = default;
556 template <typename ExecutionSpace>
557 SharedAllocationRecord(
558 const ExecutionSpace& , const Kokkos::CudaSpace& arg_space,
559 const std::string& arg_label, const size_t arg_alloc_size,
560 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
562#ifdef KOKKOS_ENABLE_DEBUG
563 &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
565 Impl::checked_allocation_with_header(arg_space, arg_label,
567 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
572 SharedAllocationHeader header;
574 this->base_t::_fill_host_accessible_header_info(header, arg_label);
579 deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header);
582 SharedAllocationRecord(
584 const std::string& arg_label,
const size_t arg_alloc_size,
585 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
587 SharedAllocationRecord(
589 const size_t arg_alloc_size,
590 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
594 static void deep_copy_header_no_exec(
void*,
const void*);
597 template <
typename AliasType>
598 inline ::cudaTextureObject_t attach_texture_object() {
599 static_assert((std::is_same<AliasType, int>::value ||
600 std::is_same<AliasType, ::int2>::value ||
601 std::is_same<AliasType, ::int4>::value),
602 "Cuda texture fetch only supported for alias types of int, "
603 "::int2, or ::int4");
605 if (m_tex_obj == 0) {
606 m_tex_obj = attach_texture_object(
sizeof(AliasType),
607 (
void*)RecordBase::m_alloc_ptr,
608 RecordBase::m_alloc_size);
614 template <
typename AliasType>
615 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
617 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
622class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
623 :
public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
625 friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
627 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
628 using RecordBase = SharedAllocationRecord<void, void>;
630 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
631 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
633 static RecordBase s_root_record;
635 ::cudaTextureObject_t m_tex_obj = 0;
639 ~SharedAllocationRecord();
640 SharedAllocationRecord() = default;
646 template <typename ExecutionSpace>
647 SharedAllocationRecord(
648 const ExecutionSpace& ,
649 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
650 const size_t arg_alloc_size,
651 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
653#ifdef KOKKOS_ENABLE_DEBUG
654 &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
656 Impl::checked_allocation_with_header(arg_space, arg_label,
658 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
662 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
666 SharedAllocationRecord(
668 const size_t arg_alloc_size,
669 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
672 template <
typename AliasType>
673 inline ::cudaTextureObject_t attach_texture_object() {
674 static_assert((std::is_same<AliasType, int>::value ||
675 std::is_same<AliasType, ::int2>::value ||
676 std::is_same<AliasType, ::int4>::value),
677 "Cuda texture fetch only supported for alias types of int, "
678 "::int2, or ::int4");
680 if (m_tex_obj == 0) {
681 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
682 attach_texture_object(
sizeof(AliasType),
683 (
void*)RecordBase::m_alloc_ptr,
684 RecordBase::m_alloc_size);
690 template <
typename AliasType>
691 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
693 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
698class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
699 :
public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
701 friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
703 using RecordBase = SharedAllocationRecord<void, void>;
704 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
706 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
707 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
709 static RecordBase s_root_record;
714 ~SharedAllocationRecord();
715 SharedAllocationRecord() = default;
721 template <typename ExecutionSpace>
722 SharedAllocationRecord(
723 const ExecutionSpace& ,
724 const Kokkos::CudaHostPinnedSpace& arg_space,
725 const std::string& arg_label, const size_t arg_alloc_size,
726 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
728#ifdef KOKKOS_ENABLE_DEBUG
729 &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
730 void>::s_root_record,
732 Impl::checked_allocation_with_header(arg_space, arg_label,
734 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
737 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
741 SharedAllocationRecord(
743 const std::string& arg_label,
const size_t arg_alloc_size,
744 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
A thread safe view to a bitset.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Memory management for host memory.
DefaultHostExecutionSpace execution_space
Default execution space for this memory space.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.