Intel(R) Threading Building Blocks Doxygen Documentation version 4.2.3
Loading...
Searching...
No Matches
tbb/parallel_for.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_for_H
18#define __TBB_parallel_for_H
19
20#define __TBB_parallel_for_H_include_area
22
23#include <new>
24#include "task.h"
25#include "partitioner.h"
26#include "blocked_range.h"
27#include "tbb_exception.h"
29
30namespace tbb {
31
32namespace interface9 {
34namespace internal {
35
37 void* allocate_sibling(task* start_for_task, size_t bytes);
38
40
41 template<typename Range, typename Body, typename Partitioner>
42 class start_for: public task {
43 Range my_range;
44 const Body my_body;
45 typename Partitioner::task_partition_type my_partition;
47
50 my_partition.note_affinity( id );
51 }
52
53 public:
55 start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
56 my_range(range),
57 my_body(body),
58 my_partition(partitioner)
59 {
60 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
61 }
63
64 start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
65 my_range(parent_.my_range, split_obj),
66 my_body(parent_.my_body),
67 my_partition(parent_.my_partition, split_obj)
68 {
69 my_partition.set_affinity(*this);
70 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
71 }
73
74 start_for( start_for& parent_, const Range& r, depth_t d ) :
75 my_range(r),
76 my_body(parent_.my_body),
78 {
79 my_partition.set_affinity(*this);
80 my_partition.align_depth( d );
81 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
82 }
83 static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
84 if( !range.empty() ) {
85#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
86 start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
87#else
88 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
89 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
90 task_group_context context(PARALLEL_FOR);
91 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
92#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
93 // REGION BEGIN
94 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
96 fgt_end_algorithm( (void*)&context );
97 // REGION END
98 }
99 }
100#if __TBB_TASK_GROUP_CONTEXT
101 static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
102 if( !range.empty() ) {
103 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
104 // REGION BEGIN
105 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
107 fgt_end_algorithm( (void*)&context );
108 // END REGION
109 }
110 }
111#endif /* __TBB_TASK_GROUP_CONTEXT */
113 void run_body( Range &r ) {
114 fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
115 my_body( r );
116 fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
117 }
118
120 void offer_work(typename Partitioner::split_type& split_obj) {
121 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
122 }
124 void offer_work(const Range& r, depth_t d = 0) {
125 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
126 }
127 };
128
130 // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
131 inline void* allocate_sibling(task* start_for_task, size_t bytes) {
132 task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
133 start_for_task->set_parent(parent_ptr);
134 parent_ptr->set_ref_count(2);
135 return &parent_ptr->allocate_child().allocate(bytes);
136 }
137
139 template<typename Range, typename Body, typename Partitioner>
141 my_partition.check_being_stolen( *this );
142 my_partition.execute(*this, my_range);
143 return NULL;
144 }
145} // namespace internal
147} // namespace interfaceX
148
150namespace internal {
152
154 template<typename Function, typename Index>
156 const Function &my_func;
157 const Index my_begin;
158 const Index my_step;
159 public:
160 parallel_for_body( const Function& _func, Index& _begin, Index& _step )
161 : my_func(_func), my_begin(_begin), my_step(_step) {}
162
163 void operator()( const tbb::blocked_range<Index>& r ) const {
164 // A set of local variables to help the compiler with vectorization of the following loop.
165 Index b = r.begin();
166 Index e = r.end();
167 Index ms = my_step;
168 Index k = my_begin + b*ms;
169
170#if __INTEL_COMPILER
171#pragma ivdep
172#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
173#pragma vector always assert
174#endif
175#endif
176 for ( Index i = b; i < e; ++i, k += ms ) {
177 my_func( k );
178 }
179 }
180 };
181} // namespace internal
183
184// Requirements on Range concept are documented in blocked_range.h
185
196
198
199template<typename Range, typename Body>
200void parallel_for( const Range& range, const Body& body ) {
202}
203
205
206template<typename Range, typename Body>
207void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
209}
210
212
213template<typename Range, typename Body>
214void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
216}
217
219
220template<typename Range, typename Body>
221void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
223}
224
226
227template<typename Range, typename Body>
228void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
230}
231
232#if __TBB_TASK_GROUP_CONTEXT
234
235template<typename Range, typename Body>
236void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
238}
239
241
242template<typename Range, typename Body>
243void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
244 internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
245}
246
248
249template<typename Range, typename Body>
250void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
251 internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
252}
253
255
256template<typename Range, typename Body>
257void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
258 internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
259}
260
262
263template<typename Range, typename Body>
264void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
265 internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
266}
267#endif /* __TBB_TASK_GROUP_CONTEXT */
269
270namespace strict_ppl {
271
273
274template <typename Index, typename Function, typename Partitioner>
275void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
276 if (step <= 0 )
277 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
278 else if (last > first) {
279 // Above "else" avoids "potential divide by zero" warning on some platforms
280 Index end = (last - first - Index(1)) / step + Index(1);
281 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
283 tbb::parallel_for(range, body, partitioner);
284 }
285}
286
288template <typename Index, typename Function>
289void parallel_for(Index first, Index last, Index step, const Function& f) {
290 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
291}
293template <typename Index, typename Function>
294void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
295 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
296}
298template <typename Index, typename Function>
299void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
300 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
301}
303template <typename Index, typename Function>
304void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
305 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
306}
308template <typename Index, typename Function>
309void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
310 parallel_for_impl(first, last, step, f, partitioner);
311}
312
314template <typename Index, typename Function>
315void parallel_for(Index first, Index last, const Function& f) {
316 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
317}
319template <typename Index, typename Function>
320void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
321 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
322}
324template <typename Index, typename Function>
325void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
326 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
327}
329template <typename Index, typename Function>
330void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
331 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
332}
334template <typename Index, typename Function>
335void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
336 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
337}
338
339#if __TBB_TASK_GROUP_CONTEXT
341template <typename Index, typename Function, typename Partitioner>
342void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
343 if (step <= 0 )
344 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
345 else if (last > first) {
346 // Above "else" avoids "potential divide by zero" warning on some platforms
347 Index end = (last - first - Index(1)) / step + Index(1);
348 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
350 tbb::parallel_for(range, body, partitioner, context);
351 }
352}
353
355template <typename Index, typename Function>
356void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
357 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
358}
360 template <typename Index, typename Function>
361void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
362 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
363}
365 template <typename Index, typename Function>
366void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
367 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
368}
370template <typename Index, typename Function>
371void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
372 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
373}
375 template <typename Index, typename Function>
376void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
377 parallel_for_impl(first, last, step, f, partitioner, context);
378}
379
380
382template <typename Index, typename Function>
383void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
384 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
385}
387 template <typename Index, typename Function>
388void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
389 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
390}
392 template <typename Index, typename Function>
393void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
394 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
395}
397template <typename Index, typename Function>
398void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
399 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
400}
402 template <typename Index, typename Function>
403void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
404 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
405}
406
407#endif /* __TBB_TASK_GROUP_CONTEXT */
409
410} // namespace strict_ppl
411
413
414} // namespace tbb
415
416#if TBB_PREVIEW_SERIAL_SUBSET
417#define __TBB_NORMAL_EXECUTION
418#include "../serial/tbb/parallel_for.h"
419#undef __TBB_NORMAL_EXECUTION
420#endif
421
423#undef __TBB_parallel_for_H_include_area
424
425#endif /* __TBB_parallel_for_H */
#define __TBB_DEFAULT_PARTITIONER
Definition tbb_config.h:596
#define __TBB_override
Definition tbb_stddef.h:240
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task * task
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void parallel_for(const Range &range, const Body &body)
Parallel iteration over range with default partitioner.
The graph class.
static void fgt_algorithm(string_index, void *, void *)
static void fgt_begin_algorithm(string_index, void *)
void throw_exception(exception_id eid)
Versionless convenience wrapper for throw_exception_v4()
static void fgt_end_algorithm(void *)
static void fgt_alg_begin_body(string_index, void *, void *)
auto last(Container &c) -> decltype(begin(c))
auto first(Container &c) -> decltype(begin(c))
static void fgt_alg_end_body(void *)
void parallel_for(Index first, Index last, Index step, const Function &f)
Parallel iteration over a range of integers with a step provided and default partitioner.
void parallel_for_impl(Index first, Index last, Index step, const Function &f, Partitioner &partitioner)
Implementation of parallel iteration over stepped range of integers with explicit step and partitione...
void * allocate_sibling(task *start_for_task, size_t bytes)
allocate right task with new parent
A range over which to iterate.
const_iterator begin() const
Beginning of range.
const_iterator end() const
One past last value in range.
Task type used in parallel_for.
void note_affinity(affinity_id id) __TBB_override
Update affinity info, if any.
void offer_work(typename Partitioner::split_type &split_obj)
spawn right task, serves as callback for partitioner
start_for(start_for &parent_, typename Partitioner::split_type &split_obj)
Splitting constructor used to generate children.
Partitioner::task_partition_type my_partition
static void run(const Range &range, const Body &body, Partitioner &partitioner, task_group_context &context)
start_for(start_for &parent_, const Range &r, depth_t d)
Construct right child from the given range as response to the demand.
static void run(const Range &range, const Body &body, Partitioner &partitioner)
task * execute() __TBB_override
execute task for parallel_for
void run_body(Range &r)
Run body for range, serves as callback for partitioner.
void offer_work(const Range &r, depth_t d=0)
spawn right task, serves as callback for partitioner
start_for(const Range &range, const Body &body, Partitioner &partitioner)
Constructor for root task.
Calls the function with values from range [begin, end) with a step provided.
void operator()(const tbb::blocked_range< Index > &r) const
parallel_for_body(const Function &_func, Index &_begin, Index &_step)
Join task node that contains shared flag for stealing feedback.
A simple partitioner.
An auto partitioner.
A static partitioner.
An affinity partitioner.
task &__TBB_EXPORTED_METHOD allocate(size_t size) const
Definition task.cpp:114
Used to form groups of tasks.
Definition task.h:358
Base class for user-defined tasks.
Definition task.h:615
virtual task * execute()=0
Should be overridden by derived classes.
internal::allocate_child_proxy & allocate_child()
Returns proxy for overloaded new that allocates a child task of *this.
Definition task.h:681
internal::allocate_continuation_proxy & allocate_continuation()
Returns proxy for overloaded new that allocates a continuation task of *this.
Definition task.h:676
void set_parent(task *p)
sets parent task pointer to specified value
Definition task.h:868
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition task.h:663
void set_ref_count(int count)
Set reference count.
Definition task.h:761
task_group_context * context()
This method is deprecated and will be removed in the future.
Definition task.h:878
internal::affinity_id affinity_id
An id as used for specifying affinity.
Definition task.h:940
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition task.h:808
Base class for types that should not be assigned.
Definition tbb_stddef.h:322
Dummy type that distinguishes splitting constructor from copy constructor.
Definition tbb_stddef.h:416

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.