Intel(R) Threading Building Blocks Doxygen Documentation version 4.2.3
Loading...
Searching...
No Matches
parallel_scan.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_scan_H
18#define __TBB_parallel_scan_H
19
20#define __TBB_parallel_scan_H_include_area
22
23#include "task.h"
24#include "aligned_space.h"
25#include <new>
26#include "partitioner.h"
27
28namespace tbb {
29
31
33 static bool is_final_scan() {return false;}
34 operator bool() {return is_final_scan();}
35};
36
38
40 static bool is_final_scan() {return true;}
41 operator bool() {return is_final_scan();}
42};
43
45namespace internal {
46
48
49 template<typename Range, typename Body>
50 class final_sum: public task {
51 public:
52 Body my_body;
53 private:
54 aligned_space<Range> my_range;
57 public:
58 final_sum( Body& body_ ) :
59 my_body(body_,split())
60 {
62 }
64 my_range.begin()->~Range();
65 }
66 void finish_construction( const Range& range_, Body* stuff_last_ ) {
67 new( my_range.begin() ) Range(range_);
68 my_stuff_last = stuff_last_;
69 }
70 private:
72 my_body( *my_range.begin(), final_scan_tag() );
73 if( my_stuff_last )
74 my_stuff_last->assign(my_body);
75 return NULL;
76 }
77 };
78
80
81 template<typename Range, typename Body>
82 class sum_node: public task {
84 public:
88 private:
93 Range my_range;
94 sum_node( const Range range_, bool left_is_final_ ) :
95 my_stuff_last(NULL),
96 my_left_sum(NULL),
97 my_left(NULL),
98 my_right(NULL),
99 my_left_is_final(left_is_final_),
100 my_range(range_)
101 {
102 // Poison fields that will be set by second pass.
105 }
106 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
107 if( !n ) {
108 f.recycle_as_child_of( *this );
109 f.finish_construction( range_, stuff_last_ );
110 return &f;
111 } else {
112 n->my_body = &f;
113 n->my_incoming = incoming_;
114 n->my_stuff_last = stuff_last_;
115 return n;
116 }
117 }
119 if( my_body ) {
120 if( my_incoming )
121 my_left_sum->my_body.reverse_join( my_incoming->my_body );
123 sum_node& c = *this;
126 set_ref_count( (a!=NULL)+(b!=NULL) );
127 my_body = NULL;
128 if( a ) spawn(*b);
129 else a = b;
130 return a;
131 } else {
132 return NULL;
133 }
134 }
135 template<typename Range_,typename Body_,typename Partitioner_>
136 friend class start_scan;
137
138 template<typename Range_,typename Body_>
139 friend class finish_scan;
140 };
141
143
144 template<typename Range, typename Body>
145 class finish_scan: public task {
150 public:
153
156 if( my_result.my_left )
158 if( my_right_zombie && my_sum )
159 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
163 } else {
164 destroy( my_result );
165 }
167 destroy(*my_right_zombie);
168 my_right_zombie = NULL;
169 }
170 return NULL;
171 }
172
173 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
174 my_sum(sum_),
175 my_return_slot(return_slot_),
176 my_right_zombie(NULL),
177 my_result(result_)
178 {
180 }
181 };
182
184
185 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
186 class start_scan: public task {
197 Range my_range;
198 typename Partitioner::partition_type my_partition;
200 public:
201 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
202 my_body(parent_.my_body),
203 my_sum(parent_.my_sum),
204 my_return_slot(&return_slot_),
205 my_parent_sum(parent_sum_),
206 my_is_final(parent_.my_is_final),
207 my_is_right_child(false),
208 my_range(parent_.my_range,split()),
210 {
211 __TBB_ASSERT( !*my_return_slot, NULL );
212 }
213
214 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
215 my_body(&body_),
216 my_sum(NULL),
217 my_return_slot(&return_slot_),
218 my_parent_sum(NULL),
219 my_is_final(true),
220 my_is_right_child(false),
221 my_range(range_),
222 my_partition(partitioner_)
223 {
224 __TBB_ASSERT( !*my_return_slot, NULL );
225 }
226
227 static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
228 if( !range_.empty() ) {
229 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
231 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
232 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
233 /*my_return_slot=*/root,
234 range_,
235 *temp_body,
236 partitioner_ );
237 temp_body->my_body.reverse_join(body_);
239 if( root ) {
240 root->my_body = temp_body;
241 root->my_incoming = NULL;
242 root->my_stuff_last = &body_;
244 } else {
245 body_.assign(temp_body->my_body);
246 temp_body->finish_construction( range_, NULL );
247 temp_body->destroy(*temp_body);
248 }
249 }
250 }
251 };
252
253 template<typename Range, typename Body, typename Partitioner>
255 typedef internal::finish_scan<Range,Body> finish_pass1_type;
256 finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
257 // Inspecting p->result.left_sum would ordinarily be a race condition.
258 // But we inspect it only if we are not a stolen task, in which case we
259 // know that task assigning to p->result.left_sum has completed.
260 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
261 if( treat_as_stolen ) {
262 // Invocation is for right child that has been really stolen or needs to be virtually stolen
263 p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
264 my_is_final = false;
265 }
266 task* next_task = NULL;
267 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
268 if( my_is_final )
269 (my_body->my_body)( my_range, final_scan_tag() );
270 else if( my_sum )
271 (my_body->my_body)( my_range, pre_scan_tag() );
272 if( my_sum )
273 *my_sum = my_body;
274 __TBB_ASSERT( !*my_return_slot, NULL );
275 } else {
276 sum_node_type* result;
277 if( my_parent_sum )
278 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
279 else
280 result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
281 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
282 // Split off right child
283 start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
284 b.my_is_right_child = true;
285 // Left child is recycling of *this. Must recycle this before spawning b,
286 // otherwise b might complete and decrement c.ref_count() to zero, which
287 // would cause c.execute() to run prematurely.
288 recycle_as_child_of(c);
289 c.set_ref_count(2);
290 c.spawn(b);
291 my_sum = &result->my_left_sum;
292 my_return_slot = &result->my_left;
293 my_is_right_child = false;
294 next_task = this;
295 my_parent_sum = result;
296 __TBB_ASSERT( !*my_return_slot, NULL );
297 }
298 return next_task;
299 }
300
301 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
303 Value my_sum;
304 const Value& identity_element;
305 const Scan& my_scan;
306 const ReverseJoin& my_reverse_join;
307 public:
308 lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
309 : my_sum(identity)
310 , identity_element(identity)
311 , my_scan(scan)
312 , my_reverse_join(rev_join) {}
313
317 , my_scan(b.my_scan)
319
320 template<typename Tag>
321 void operator()( const Range& r, Tag tag ) {
322 my_sum = my_scan(r, my_sum, tag);
323 }
324
327 }
328
330 my_sum = b.my_sum;
331 }
332
333 Value result() const {
334 return my_sum;
335 }
336 };
337} // namespace internal
339
340// Requirements on Range concept are documented in blocked_range.h
341
359
361
362template<typename Range, typename Body>
363void parallel_scan( const Range& range, Body& body ) {
365}
366
368
369template<typename Range, typename Body>
370void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
372}
373
375
376template<typename Range, typename Body>
377void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
379}
380
382
383template<typename Range, typename Value, typename Scan, typename ReverseJoin>
384Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
385 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
387 return body.result();
388}
389
391
392template<typename Range, typename Value, typename Scan, typename ReverseJoin>
393Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
394 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
395 tbb::parallel_scan(range,body,partitioner);
396 return body.result();
397}
398
400
401template<typename Range, typename Value, typename Scan, typename ReverseJoin>
402Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
403 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
404 tbb::parallel_scan(range,body,partitioner);
405 return body.result();
406}
407
409
410} // namespace tbb
411
413#undef __TBB_parallel_scan_H_include_area
414
415#endif /* __TBB_parallel_scan_H */
416
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:596
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
#define __TBB_override
Definition: tbb_stddef.h:240
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync p
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
The graph class.
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:305
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:32
static bool is_final_scan()
Definition: parallel_scan.h:33
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:39
static bool is_final_scan()
Definition: parallel_scan.h:40
Performs final scan for a leaf.
Definition: parallel_scan.h:50
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:71
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:56
aligned_space< Range > my_range
Definition: parallel_scan.h:54
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:66
Split work to be done in the scan.
Definition: parallel_scan.h:82
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:83
final_sum_type * my_body
Definition: parallel_scan.h:86
task * execute() __TBB_override
Should be overridden by derived classes.
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:94
final_sum_type * my_incoming
Definition: parallel_scan.h:85
final_sum_type * my_left_sum
Definition: parallel_scan.h:89
Combine partial results.
sum_node< Range, Body > sum_node_type
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
final_sum< Range, Body > final_sum_type
final_sum_type * my_right_zombie
sum_node_type *& my_return_slot
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum_type **const my_sum
Initial task to split the work.
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
final_sum_type ** my_sum
sum_node_type * my_parent_sum
sum_node_type ** my_return_slot
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
sum_node< Range, Body > sum_node_type
Partitioner::partition_type my_partition
final_sum_type * my_body
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum< Range, Body > final_sum_type
void reverse_join(lambda_scan_body &a)
void assign(lambda_scan_body &b)
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
void operator()(const Range &r, Tag tag)
const ReverseJoin & my_reverse_join
lambda_scan_body(lambda_scan_body &b, split)
A simple partitioner.
Definition: partitioner.h:586
An auto partitioner.
Definition: partitioner.h:613
Base class for user-defined tasks.
Definition: task.h:615
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:725
void recycle_as_continuation()
Change this to be a continuation of its former self.
Definition: task.h:711
int ref_count() const
The internal reference count.
Definition: task.h:915
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:663
void set_ref_count(int count)
Set reference count.
Definition: task.h:761
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:808
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:416

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.