Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_DynamicThreadedStorage_cuda.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#if defined( __CUDA_ARCH__ )
43
44namespace Stokhos {
45
46 template <typename ordinal_t, typename value_t>
47 class DynamicThreadedStorage<ordinal_t, value_t, Kokkos::Cuda> {
48 public:
49
50 static const bool is_static = false;
51 static const int static_size = 0;
52 static const bool supports_reset = true;
53
54 typedef ordinal_t ordinal_type;
55 typedef value_t value_type;
56 typedef Kokkos::Cuda execution_space;
57 typedef value_type& reference;
58 typedef volatile value_type& volatile_reference;
59 typedef const value_type& const_reference;
60 typedef const volatile value_type& const_volatile_reference;
61 typedef value_type* pointer;
62 typedef volatile value_type* volatile_pointer;
63 typedef const value_type* const_pointer;
64 typedef const volatile value_type* const_volatile_pointer;
66
68 template <typename ord_t, typename val_t = value_t , typename dev_t = Kokkos::Cuda >
69 struct apply {
70 typedef DynamicThreadedStorage<ord_t,val_t,dev_t> type;
71 };
72
74 __device__
75 DynamicThreadedStorage(const ordinal_type& sz = 1,
76 const value_type& x = value_type(0.0)) :
77 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
78 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
79 }
80
82 __device__
83 DynamicThreadedStorage(const ordinal_type& sz, const value_type* x) :
84 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
85 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
86 }
87
89 __device__
90 DynamicThreadedStorage(const ordinal_type& sz, pointer v, bool owned) :
91 coeff_(v), sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_),
92 is_owned_(owned) {}
93
95 __device__
96 DynamicThreadedStorage(const DynamicThreadedStorage& s) :
97 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
98 allocate_coeff_array(coeff_, is_owned_, total_sz_);
99 for (ordinal_type i=0; i<total_sz_; i+=stride_)
100 coeff_[i] = s.coeff_[i];
101 }
102
104 __device__
105 DynamicThreadedStorage(const volatile DynamicThreadedStorage& s) :
106 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
107 allocate_coeff_array(coeff_, is_owned_, total_sz_);
108 for (ordinal_type i=0; i<total_sz_; i+=stride_)
109 coeff_[i] = s.coeff_[i];
110 }
111
113 __device__
114 ~DynamicThreadedStorage() {
115 destroy_coeff_array(coeff_, is_owned_, total_sz_);
116 }
117
119 __device__
120 DynamicThreadedStorage& operator=(const DynamicThreadedStorage& s) {
121 if (&s != this) {
122 if (s.sz_ != sz_) {
123 destroy_coeff_array(coeff_, is_owned_, total_sz_);
124 sz_ = s.sz_;
125 stride_ = s.stride_;
126 total_sz_ = sz_*stride_;
127 allocate_coeff_array(coeff_, is_owned_, total_sz_);
128 for (ordinal_type i=0; i<total_sz_; i+=stride_)
129 coeff_[i] = s.coeff_[i];
130 }
131 else {
132 for (ordinal_type i=0; i<total_sz_; i+=stride_)
133 coeff_[i] = s.coeff_[i];
134 }
135 }
136 return *this;
137 }
138
140 __device__
141 DynamicThreadedStorage&
142 operator=(const volatile DynamicThreadedStorage& s) {
143 if (&s != this) {
144 if (s.sz_ != sz_) {
145 destroy_coeff_array(coeff_, is_owned_, total_sz_);
146 sz_ = s.sz_;
147 stride_ = s.stride_;
148 total_sz_ = sz_*stride_;
149 allocate_coeff_array(coeff_, is_owned_, total_sz_);
150 for (ordinal_type i=0; i<total_sz_; i+=stride_)
151 coeff_[i] = s.coeff_[i];
152 }
153 else {
154 for (ordinal_type i=0; i<total_sz_; i+=stride_)
155 coeff_[i] = s.coeff_[i];
156 }
157 }
158 return *this;
159 }
160
162 __device__
163 volatile DynamicThreadedStorage&
164 operator=(const DynamicThreadedStorage& s) volatile {
165 if (&s != this) {
166 if (s.sz_ != sz_) {
167 destroy_coeff_array(coeff_, is_owned_, total_sz_);
168 sz_ = s.sz_;
169 stride_ = s.stride_;
170 total_sz_ = sz_*stride_;
171 allocate_coeff_array(coeff_, is_owned_, total_sz_);
172 for (ordinal_type i=0; i<total_sz_; i+=stride_)
173 coeff_[i] = s.coeff_[i];
174 }
175 else {
176 for (ordinal_type i=0; i<total_sz_; i+=stride_)
177 coeff_[i] = s.coeff_[i];
178 }
179 }
180 return *this;
181 }
182
184 __device__
185 volatile DynamicThreadedStorage&
186 operator=(const volatile DynamicThreadedStorage& s) volatile {
187 if (&s != this) {
188 if (s.sz_ != sz_) {
189 destroy_coeff_array(coeff_, is_owned_, total_sz_);
190 sz_ = s.sz_;
191 stride_ = s.stride_;
192 total_sz_ = sz_*stride_;
193 allocate_coeff_array(coeff_, is_owned_, total_sz_);
194 for (ordinal_type i=0; i<total_sz_; i+=stride_)
195 coeff_[i] = s.coeff_[i];
196 }
197 else {
198 for (ordinal_type i=0; i<total_sz_; i+=stride_)
199 coeff_[i] = s.coeff_[i];
200 }
201 }
202 return *this;
203 }
204
206 __device__
207 void init(const_reference v) {
208 for (ordinal_type i=0; i<total_sz_; i+=stride_)
209 coeff_[i] = v;
210 }
211
213 __device__
214 void init(const_reference v) volatile {
215 for (ordinal_type i=0; i<total_sz_; i+=stride_)
216 coeff_[i] = v;
217 }
218
220 __device__
221 void init(const_pointer v, const ordinal_type& sz = 0) {
222 ordinal_type my_sz = stride_*sz;
223 if (sz == 0)
224 my_sz = total_sz_;
225 for (ordinal_type i=0; i<my_sz; i+=stride_)
226 coeff_[i] = v[i];
227 }
228
230 __device__
231 void init(const_pointer v, const ordinal_type& sz = 0) volatile {
232 ordinal_type my_sz = stride_*sz;
233 if (sz == 0)
234 my_sz = total_sz_;
235 for (ordinal_type i=0; i<my_sz; i+=stride_)
236 coeff_[i] = v[i];
237 }
238
240 __device__
241 void load(pointer v) {
242 for (ordinal_type i=0; i<total_sz_; i+=stride_)
243 coeff_[i] = v[i];
244 }
245
247 __device__
248 void load(pointer v) volatile {
249 for (ordinal_type i=0; i<total_sz_; i+=stride_)
250 coeff_[i] = v[i];
251 }
252
254 __device__
255 void resize(const ordinal_type& sz) {
256 if (sz != sz_) {
257 value_type *coeff_new;
258 bool owned_new;
259 ordinal_type total_sz_new = sz*stride_;
260 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
261 ordinal_type my_tsz = total_sz_;
262 if (total_sz_ > total_sz_new)
263 my_tsz = total_sz_new;
264 for (ordinal_type i=0; i<my_tsz; i+=stride_)
265 coeff_new[i] = coeff_[i];
266 destroy_coeff_array(coeff_, is_owned_, total_sz_);
267 coeff_ = coeff_new;
268 sz_ = sz;
269 total_sz_ = total_sz_new;
270 is_owned_ = owned_new;
271 }
272 }
273
275 __device__
276 void resize(const ordinal_type& sz) volatile {
277 if (sz != sz_) {
278 value_type *coeff_new;
279 bool owned_new;
280 ordinal_type total_sz_new = sz*stride_;
281 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
282 ordinal_type my_tsz = total_sz_;
283 if (total_sz_ > total_sz_new)
284 my_tsz = total_sz_new;
285 for (ordinal_type i=0; i<my_tsz; i+=stride_)
286 coeff_new[i] = coeff_[i];
287 destroy_coeff_array(coeff_, is_owned_, total_sz_);
288 coeff_ = coeff_new;
289 sz_ = sz;
290 total_sz_ = total_sz_new;
291 is_owned_ = owned_new;
292 }
293 }
294
296 __device__
297 void shallowReset(pointer v, const ordinal_type& sz,
298 const ordinal_type& stride, bool owned) {
299 destroy_coeff_array(coeff_, is_owned_, total_sz_);
300 coeff_ = v;
301 sz_ = sz;
302 stride_ = stride;
303 total_sz_ = sz_*stride_;
304 is_owned_ = owned;
305 }
306
308 __device__
309 void shallowReset(pointer v, const ordinal_type& sz,
310 const ordinal_type& stride, bool owned) volatile {
311 destroy_coeff_array(coeff_, is_owned_, total_sz_);
312 coeff_ = v;
313 sz_ = sz;
314 stride_ = stride;
315 total_sz_ = sz_*stride_;
316 is_owned_ = owned;
317 }
318
320 __device__
321 ordinal_type size() const { return sz_; }
322
324 __device__
325 ordinal_type size() const volatile { return sz_; }
326
328 KOKKOS_INLINE_FUNCTION
329 const_reference operator[] (const ordinal_type& i) const {
330 return coeff_[i*stride_];
331 }
332
334 KOKKOS_INLINE_FUNCTION
335 const_volatile_reference operator[] (const ordinal_type& i) const volatile {
336 return coeff_[i*stride_];
337 }
338
340 KOKKOS_INLINE_FUNCTION
341 reference operator[] (const ordinal_type& i) {
342 return coeff_[i*stride_];
343 }
344
346 KOKKOS_INLINE_FUNCTION
347 volatile_reference operator[] (const ordinal_type& i) volatile {
348 return coeff_[i*stride_];
349 }
350
351 template <int i>
352 KOKKOS_INLINE_FUNCTION
353 reference getCoeff() { return coeff_[i*stride_]; }
354
355 template <int i>
356 KOKKOS_INLINE_FUNCTION
357 volatile_reference getCoeff() volatile { return coeff_[i*stride_]; }
358
359 template <int i>
360 KOKKOS_INLINE_FUNCTION
361 const_reference getCoeff() const { return coeff_[i*stride_]; }
362
363 template <int i>
364 KOKKOS_INLINE_FUNCTION
365 const_volatile_reference getCoeff() const volatile { return coeff_[i*stride_]; }
366
368 KOKKOS_INLINE_FUNCTION
369 const_volatile_pointer coeff() const volatile { return coeff_; }
370
372 KOKKOS_INLINE_FUNCTION
373 const_pointer coeff() const { return coeff_; }
374
376 KOKKOS_INLINE_FUNCTION
377 volatile_pointer coeff() volatile { return coeff_; }
378
380 KOKKOS_INLINE_FUNCTION
381 pointer coeff() { return coeff_; }
382
383 protected:
384
386 __device__
387 ordinal_type num_threads() const {
388 return blockDim.x*blockDim.y*blockDim.z;
389 }
390
392 __device__
393 ordinal_type num_threads() const volatile {
394 return blockDim.x*blockDim.y*blockDim.z;
395 }
396
398 __device__
399 ordinal_type thread_index() const {
400 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
401 }
402
404 __device__
405 ordinal_type thread_index() const volatile {
406 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
407 }
408
410 __device__
411 void allocate_coeff_array(pointer& c, bool& owned,
412 ordinal_type total_size,
413 const value_type& x = value_type(0.0)) {
414
415 // Allocate coefficient array on thread 0
416 __shared__ pointer ptr;
417 ordinal_type tidx = thread_index();
418 if (tidx == 0) {
419 ptr = ds::get_and_fill(total_size,x);
420 owned = true;
421 }
422 else
423 owned = false;
424 __syncthreads();
425
426 // Give each thread its portion of the array
427 c = ptr + tidx;
428 }
429
431 __device__
432 void allocate_coeff_array(pointer& c, bool& owned,
433 ordinal_type total_size,
434 const value_type& x = value_type(0.0)) volatile {
435
436 // Allocate coefficient array on thread 0
437 __shared__ pointer ptr;
438 ordinal_type tidx = thread_index();
439 if (tidx == 0) {
440 ptr = ds::get_and_fill(total_size,x);
441 owned = true;
442 }
443 else
444 owned = false;
445 __syncthreads();
446
447 // Give each thread its portion of the array
448 c = ptr + tidx;
449 }
450
452 __device__
453 void allocate_coeff_array(pointer& c, bool& owned,
454 ordinal_type total_size,
455 const value_type* x) {
456
457 // Allocate coefficient array on thread 0
458 __shared__ pointer ptr;
459 ordinal_type tidx = thread_index();
460 if (tidx == 0) {
461 ptr = ds::get_and_fill(x, total_size);
462 owned = true;
463 }
464 else
465 owned = false;
466 __syncthreads();
467
468 // Give each thread its portion of the array
469 c = ptr + tidx;
470 }
471
473 __device__
474 void destroy_coeff_array(pointer c, bool owned, ordinal_type total_size) {
475 __syncthreads();
476 if (owned)
477 ds::destroy_and_release(c, total_size);
478 }
479
481 __device__
482 void destroy_coeff_array(pointer c, bool owned, ordinal_type total_size) volatile {
483 __syncthreads();
484 if (owned)
485 ds::destroy_and_release(c, total_size);
486 }
487
488 private:
489
491 pointer coeff_;
492
494 ordinal_type sz_;
495
497 ordinal_type stride_;
498
500 ordinal_type total_sz_;
501
503 bool is_owned_;
504
505 };
506
507}
508
509#endif
Kokkos::DefaultExecutionSpace execution_space
Top-level namespace for Stokhos classes and functions.
Dynamic array allocation class that is specialized for scalar i.e., fundamental or built-in types (fl...