LLVM OpenMP* Runtime Library
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_wait_release.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 /* ------------------------------------------------------------------------ */
27 /* ------------------------------------------------------------------------ */
28 
29 
30 /* forward declaration */
31 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34 
35 #ifdef OMP_45_ENABLED
36 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37 #endif
38 
39 #ifdef BUILD_TIED_TASK_STACK
40 
41 //---------------------------------------------------------------------------
42 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
43 // from top do bottom
44 //
45 // gtid: global thread identifier for thread containing stack
46 // thread_data: thread data for task team thread containing stack
47 // threshold: value above which the trace statement triggers
48 // location: string identifying call site of this function (for trace)
49 
50 static void
51 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
52 {
53  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
54  kmp_taskdata_t **stack_top = task_stack -> ts_top;
55  kmp_int32 entries = task_stack -> ts_entries;
56  kmp_taskdata_t *tied_task;
57 
58  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
59  "first_block = %p, stack_top = %p \n",
60  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
61 
62  KMP_DEBUG_ASSERT( stack_top != NULL );
63  KMP_DEBUG_ASSERT( entries > 0 );
64 
65  while ( entries != 0 )
66  {
67  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
68  // fix up ts_top if we need to pop from previous block
69  if ( entries & TASK_STACK_INDEX_MASK == 0 )
70  {
71  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
72 
73  stack_block = stack_block -> sb_prev;
74  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
75  }
76 
77  // finish bookkeeping
78  stack_top--;
79  entries--;
80 
81  tied_task = * stack_top;
82 
83  KMP_DEBUG_ASSERT( tied_task != NULL );
84  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
85 
86  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87  "stack_top=%p, tied_task=%p\n",
88  location, gtid, entries, stack_top, tied_task ) );
89  }
90  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
91 
92  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
93  location, gtid ) );
94 }
95 
96 //---------------------------------------------------------------------------
97 // __kmp_init_task_stack: initialize the task stack for the first time
98 // after a thread_data structure is created.
99 // It should not be necessary to do this again (assuming the stack works).
100 //
101 // gtid: global thread identifier of calling thread
102 // thread_data: thread data for task team thread containing stack
103 
104 static void
105 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
106 {
107  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
108  kmp_stack_block_t *first_block;
109 
110  // set up the first block of the stack
111  first_block = & task_stack -> ts_first_block;
112  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
113  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114 
115  // initialize the stack to be empty
116  task_stack -> ts_entries = TASK_STACK_EMPTY;
117  first_block -> sb_next = NULL;
118  first_block -> sb_prev = NULL;
119 }
120 
121 
122 //---------------------------------------------------------------------------
123 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
124 //
125 // gtid: global thread identifier for calling thread
126 // thread_data: thread info for thread containing stack
127 
128 static void
129 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130 {
131  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
133 
134  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
135  // free from the second block of the stack
136  while ( stack_block != NULL ) {
137  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
138 
139  stack_block -> sb_next = NULL;
140  stack_block -> sb_prev = NULL;
141  if (stack_block != & task_stack -> ts_first_block) {
142  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
143  }
144  stack_block = next_block;
145  }
146  // initialize the stack to be empty
147  task_stack -> ts_entries = 0;
148  task_stack -> ts_top = NULL;
149 }
150 
151 
152 //---------------------------------------------------------------------------
153 // __kmp_push_task_stack: Push the tied task onto the task stack.
154 // Grow the stack if necessary by allocating another block.
155 //
156 // gtid: global thread identifier for calling thread
157 // thread: thread info for thread containing stack
158 // tied_task: the task to push on the stack
159 
160 static void
161 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
162 {
163  // GEH - need to consider what to do if tt_threads_data not allocated yet
164  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
165  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
166  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
167 
168  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
169  return; // Don't push anything on stack if team or team tasks are serialized
170  }
171 
172  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
173  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
174 
175  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
176  gtid, thread, tied_task ) );
177  // Store entry
178  * (task_stack -> ts_top) = tied_task;
179 
180  // Do bookkeeping for next push
181  task_stack -> ts_top++;
182  task_stack -> ts_entries++;
183 
184  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
185  {
186  // Find beginning of this task block
187  kmp_stack_block_t *stack_block =
188  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
189 
190  // Check if we already have a block
191  if ( stack_block -> sb_next != NULL )
192  { // reset ts_top to beginning of next block
193  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
194  }
195  else
196  { // Alloc new block and link it up
197  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
198  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
199 
200  task_stack -> ts_top = & new_block -> sb_block[0];
201  stack_block -> sb_next = new_block;
202  new_block -> sb_prev = stack_block;
203  new_block -> sb_next = NULL;
204 
205  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
206  gtid, tied_task, new_block ) );
207  }
208  }
209  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
210 }
211 
212 //---------------------------------------------------------------------------
213 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
214 // the task, just check to make sure it matches the ending task passed in.
215 //
216 // gtid: global thread identifier for the calling thread
217 // thread: thread info structure containing stack
218 // tied_task: the task popped off the stack
219 // ending_task: the task that is ending (should match popped task)
220 
221 static void
222 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
223 {
224  // GEH - need to consider what to do if tt_threads_data not allocated yet
225  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
226  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
227  kmp_taskdata_t *tied_task;
228 
229  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
230  return; // Don't pop anything from stack if team or team tasks are serialized
231  }
232 
233  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
234  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
235 
236  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
237 
238  // fix up ts_top if we need to pop from previous block
239  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
240  {
241  kmp_stack_block_t *stack_block =
242  (kmp_stack_block_t *) (task_stack -> ts_top) ;
243 
244  stack_block = stack_block -> sb_prev;
245  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
246  }
247 
248  // finish bookkeeping
249  task_stack -> ts_top--;
250  task_stack -> ts_entries--;
251 
252  tied_task = * (task_stack -> ts_top );
253 
254  KMP_DEBUG_ASSERT( tied_task != NULL );
255  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
256  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
257 
258  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
259  return;
260 }
261 #endif /* BUILD_TIED_TASK_STACK */
262 
263 //---------------------------------------------------
264 // __kmp_push_task: Add a task to the thread's deque
265 
266 static kmp_int32
267 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
268 {
269  kmp_info_t * thread = __kmp_threads[ gtid ];
270  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
271  kmp_task_team_t * task_team = thread->th.th_task_team;
272  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
273  kmp_thread_data_t * thread_data;
274 
275  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
276 
277  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
278  // untied task needs to increment counter so that the task structure is not freed prematurely
279  kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
280  KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
281  gtid, counter, taskdata ) );
282  }
283 
284  // The first check avoids building task_team thread data if serialized
285  if ( taskdata->td_flags.task_serial ) {
286  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
287  gtid, taskdata ) );
288  return TASK_NOT_PUSHED;
289  }
290 
291  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
292  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
293  if ( ! KMP_TASKING_ENABLED(task_team) ) {
294  __kmp_enable_tasking( task_team, thread );
295  }
296  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
297  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
298 
299  // Find tasking deque specific to encountering thread
300  thread_data = & task_team -> tt.tt_threads_data[ tid ];
301 
302  // No lock needed since only owner can allocate
303  if (thread_data -> td.td_deque == NULL ) {
304  __kmp_alloc_task_deque( thread, thread_data );
305  }
306 
307  // Check if deque is full
308  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
309  {
310  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
311  gtid, taskdata ) );
312  return TASK_NOT_PUSHED;
313  }
314 
315  // Lock the deque for the task push operation
316  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
317 
318 #if OMP_45_ENABLED
319  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
320  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
321  {
322  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
324  gtid, taskdata ) );
325  return TASK_NOT_PUSHED;
326  }
327 #else
328  // Must have room since no thread can add tasks but calling thread
329  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
330 #endif
331 
332  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
333  // Wrap index.
334  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
335  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
336 
337  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
338 
339  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
340  "task=%p ntasks=%d head=%u tail=%u\n",
341  gtid, taskdata, thread_data->td.td_deque_ntasks,
342  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
343 
344  return TASK_SUCCESSFULLY_PUSHED;
345 }
346 
347 
348 //-----------------------------------------------------------------------------------------
349 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
350 // this_thr: thread structure to set current_task in.
351 
352 void
353 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
354 {
355  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
356  "curtask_parent=%p\n",
357  0, this_thr, this_thr -> th.th_current_task,
358  this_thr -> th.th_current_task -> td_parent ) );
359 
360  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
361 
362  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
363  "curtask_parent=%p\n",
364  0, this_thr, this_thr -> th.th_current_task,
365  this_thr -> th.th_current_task -> td_parent ) );
366 }
367 
368 
369 //---------------------------------------------------------------------------------------
370 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
371 // this_thr: thread structure to set up
372 // team: team for implicit task data
373 // tid: thread within team to set up
374 
375 void
376 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
377 {
378  // current task of the thread is a parent of the new just created implicit tasks of new team
379  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
380  "parent_task=%p\n",
381  tid, this_thr, this_thr->th.th_current_task,
382  team->t.t_implicit_task_taskdata[tid].td_parent ) );
383 
384  KMP_DEBUG_ASSERT (this_thr != NULL);
385 
386  if( tid == 0 ) {
387  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
388  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
389  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
390  }
391  } else {
392  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
393  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
394  }
395 
396  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
397  "parent_task=%p\n",
398  tid, this_thr, this_thr->th.th_current_task,
399  team->t.t_implicit_task_taskdata[tid].td_parent ) );
400 }
401 
402 
403 //----------------------------------------------------------------------
404 // __kmp_task_start: bookkeeping for a task starting execution
405 // GTID: global thread id of calling thread
406 // task: task starting execution
407 // current_task: task suspending
408 
409 static void
410 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
411 {
412  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
413  kmp_info_t * thread = __kmp_threads[ gtid ];
414 
415  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416  gtid, taskdata, current_task) );
417 
418  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
419 
420  // mark currently executing task as suspended
421  // TODO: GEH - make sure root team implicit task is initialized properly.
422  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423  current_task -> td_flags.executing = 0;
424 
425  // Add task to stack if tied
426 #ifdef BUILD_TIED_TASK_STACK
427  if ( taskdata -> td_flags.tiedness == TASK_TIED )
428  {
429  __kmp_push_task_stack( gtid, thread, taskdata );
430  }
431 #endif /* BUILD_TIED_TASK_STACK */
432 
433  // mark starting task as executing and as current task
434  thread -> th.th_current_task = taskdata;
435 
436  KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
437  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
438  taskdata -> td_flags.started = 1;
439  taskdata -> td_flags.executing = 1;
440  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
441  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
442 
443  // GEH TODO: shouldn't we pass some sort of location identifier here?
444  // APT: yes, we will pass location here.
445  // need to store current thread state (in a thread or taskdata structure)
446  // before setting work_state, otherwise wrong state is set after end of task
447 
448  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
449  gtid, taskdata ) );
450 
451 #if OMPT_SUPPORT
452  if (ompt_enabled &&
453  ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
454  kmp_taskdata_t *parent = taskdata->td_parent;
455  ompt_callbacks.ompt_callback(ompt_event_task_begin)(
456  parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
457  parent ? &(parent->ompt_task_info.frame) : NULL,
458  taskdata->ompt_task_info.task_id,
459  taskdata->ompt_task_info.function);
460  }
461 #endif
462 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
463  /* OMPT emit all dependences if requested by the tool */
464  if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
465  ompt_callbacks.ompt_callback(ompt_event_task_dependences))
466  {
467  ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
468  taskdata->ompt_task_info.task_id,
469  taskdata->ompt_task_info.deps,
470  taskdata->ompt_task_info.ndeps
471  );
472  /* We can now free the allocated memory for the dependencies */
473  KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
474  taskdata->ompt_task_info.deps = NULL;
475  taskdata->ompt_task_info.ndeps = 0;
476  }
477 #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
478 
479  return;
480 }
481 
482 
483 //----------------------------------------------------------------------
484 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
485 // loc_ref: source location information; points to beginning of task block.
486 // gtid: global thread number.
487 // task: task thunk for the started task.
488 
489 void
490 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
491 {
492  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
493  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
494 
495  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
496  gtid, loc_ref, taskdata, current_task ) );
497 
498  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
499  // untied task needs to increment counter so that the task structure is not freed prematurely
500  kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
501  KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
502  gtid, counter, taskdata ) );
503  }
504 
505  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
506  __kmp_task_start( gtid, task, current_task );
507 
508  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
509  gtid, loc_ref, taskdata ) );
510 
511  return;
512 }
513 
514 #ifdef TASK_UNUSED
515 //----------------------------------------------------------------------
516 // __kmpc_omp_task_begin: report that a given task has started execution
517 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
518 
519 void
520 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521 {
522  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523 
524  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
525  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526 
527  __kmp_task_start( gtid, task, current_task );
528 
529  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
530  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
531 
532  return;
533 }
534 #endif // TASK_UNUSED
535 
536 
537 //-------------------------------------------------------------------------------------
538 // __kmp_free_task: free the current task space and the space for shareds
539 // gtid: Global thread ID of calling thread
540 // taskdata: task to free
541 // thread: thread data structure of caller
542 
543 static void
544 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545 {
546  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
547  gtid, taskdata) );
548 
549  // Check to make sure all flags and counters have the correct values
550  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
551  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
552  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
553  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
554  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
555  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556 
557  taskdata->td_flags.freed = 1;
558  // deallocate the taskdata and shared variable blocks associated with this task
559  #if USE_FAST_MEMORY
560  __kmp_fast_free( thread, taskdata );
561  #else /* ! USE_FAST_MEMORY */
562  __kmp_thread_free( thread, taskdata );
563  #endif
564 
565  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
566  gtid, taskdata) );
567 }
568 
569 //-------------------------------------------------------------------------------------
570 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
571 //
572 // gtid: Global thread ID of calling thread
573 // taskdata: task to free
574 // thread: thread data structure of caller
575 
576 static void
577 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578 {
579  kmp_int32 children = 0;
580  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
581 
582  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583 
584  if ( !team_or_tasking_serialized ) {
585  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
586  KMP_DEBUG_ASSERT( children >= 0 );
587  }
588 
589  // Now, go up the ancestor tree to see if any ancestors can now be freed.
590  while ( children == 0 )
591  {
592  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
593 
594  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
595  "and freeing itself\n", gtid, taskdata) );
596 
597  // --- Deallocate my ancestor task ---
598  __kmp_free_task( gtid, taskdata, thread );
599 
600  taskdata = parent_taskdata;
601 
602  // Stop checking ancestors at implicit task or if tasking serialized
603  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
604  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
605  return;
606 
607  if ( !team_or_tasking_serialized ) {
608  // Predecrement simulated by "- 1" calculation
609  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
610  KMP_DEBUG_ASSERT( children >= 0 );
611  }
612  }
613 
614  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
615  "not freeing it yet\n", gtid, taskdata, children) );
616 }
617 
618 //---------------------------------------------------------------------
619 // __kmp_task_finish: bookkeeping to do when a task finishes execution
620 // gtid: global thread ID for calling thread
621 // task: task to be finished
622 // resumed_task: task to be resumed. (may be NULL if task is serialized)
623 
624 static void
625 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
626 {
627  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
628  kmp_info_t * thread = __kmp_threads[ gtid ];
629  kmp_int32 children = 0;
630 
631 #if OMPT_SUPPORT
632  if (ompt_enabled &&
633  ompt_callbacks.ompt_callback(ompt_event_task_end)) {
634  kmp_taskdata_t *parent = taskdata->td_parent;
635  ompt_callbacks.ompt_callback(ompt_event_task_end)(
636  taskdata->ompt_task_info.task_id);
637  }
638 #endif
639 
640  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
641  gtid, taskdata, resumed_task) );
642 
643  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
644 
645  // Pop task from stack if tied
646 #ifdef BUILD_TIED_TASK_STACK
647  if ( taskdata -> td_flags.tiedness == TASK_TIED )
648  {
649  __kmp_pop_task_stack( gtid, thread, taskdata );
650  }
651 #endif /* BUILD_TIED_TASK_STACK */
652 
653  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
654  // untied task needs to check the counter so that the task structure is not freed prematurely
655  kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
656  KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
657  gtid, counter, taskdata ) );
658  if ( counter > 0 ) {
659  // untied task is not done, to be continued possibly by other thread, do not free it now
660  if (resumed_task == NULL) {
661  KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
662  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
663  }
664  thread->th.th_current_task = resumed_task; // restore current_task
665  resumed_task->td_flags.executing = 1; // resume previous task
666  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
667  gtid, taskdata, resumed_task) );
668  return;
669  }
670  }
671 
672  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
673  taskdata -> td_flags.complete = 1; // mark the task as completed
674  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
675  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
676 
677  // Only need to keep track of count if team parallel and tasking not serialized
678  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
679  // Predecrement simulated by "- 1" calculation
680  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
681  KMP_DEBUG_ASSERT( children >= 0 );
682 #if OMP_40_ENABLED
683  if ( taskdata->td_taskgroup )
684  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
685  __kmp_release_deps(gtid,taskdata);
686 #endif
687  }
688 
689  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
690  // Othertwise, if a task is executed immediately from the release_deps code
691  // the flag will be reset to 1 again by this same function
692  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
693  taskdata -> td_flags.executing = 0; // suspend the finishing task
694 
695  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
696  gtid, taskdata, children) );
697 
698 #if OMP_40_ENABLED
699  /* If the tasks' destructor thunk flag has been set, we need to invoke the
700  destructor thunk that has been generated by the compiler.
701  The code is placed here, since at this point other tasks might have been released
702  hence overlapping the destructor invokations with some other work in the
703  released tasks. The OpenMP spec is not specific on when the destructors are
704  invoked, so we should be free to choose.
705  */
706  if (taskdata->td_flags.destructors_thunk) {
707  kmp_routine_entry_t destr_thunk = task->data1.destructors;
708  KMP_ASSERT(destr_thunk);
709  destr_thunk(gtid, task);
710  }
711 #endif // OMP_40_ENABLED
712 
713  // bookkeeping for resuming task:
714  // GEH - note tasking_ser => task_serial
715  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
716  taskdata->td_flags.task_serial);
717  if ( taskdata->td_flags.task_serial )
718  {
719  if (resumed_task == NULL) {
720  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
721  }
722  else {
723  // verify resumed task passed in points to parent
724  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
725  }
726  }
727  else {
728  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
729  }
730 
731  // Free this task and then ancestor tasks if they have no children.
732  // Restore th_current_task first as suggested by John:
733  // johnmc: if an asynchronous inquiry peers into the runtime system
734  // it doesn't see the freed task as the current task.
735  thread->th.th_current_task = resumed_task;
736  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
737 
738  // TODO: GEH - make sure root team implicit task is initialized properly.
739  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
740  resumed_task->td_flags.executing = 1; // resume previous task
741 
742  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
743  gtid, taskdata, resumed_task) );
744 
745  return;
746 }
747 
748 //---------------------------------------------------------------------
749 // __kmpc_omp_task_complete_if0: report that a task has completed execution
750 // loc_ref: source location information; points to end of task block.
751 // gtid: global thread number.
752 // task: task thunk for the completed task.
753 
754 void
755 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
756 {
757  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
758  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
759 
760  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
761 
762  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
763  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
764 
765  return;
766 }
767 
768 #ifdef TASK_UNUSED
769 //---------------------------------------------------------------------
770 // __kmpc_omp_task_complete: report that a task has completed execution
771 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
772 
773 void
774 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
775 {
776  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
777  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
778 
779  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
780 
781  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
782  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
783  return;
784 }
785 #endif // TASK_UNUSED
786 
787 
788 #if OMPT_SUPPORT
789 //----------------------------------------------------------------------------------------------------
790 // __kmp_task_init_ompt:
791 // Initialize OMPT fields maintained by a task. This will only be called after
792 // ompt_tool, so we already know whether ompt is enabled or not.
793 
794 static inline void
795 __kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
796 {
797  if (ompt_enabled) {
798  task->ompt_task_info.task_id = __ompt_task_id_new(tid);
799  task->ompt_task_info.function = function;
800  task->ompt_task_info.frame.exit_runtime_frame = NULL;
801  task->ompt_task_info.frame.reenter_runtime_frame = NULL;
802 #if OMP_40_ENABLED
803  task->ompt_task_info.ndeps = 0;
804  task->ompt_task_info.deps = NULL;
805 #endif /* OMP_40_ENABLED */
806  }
807 }
808 #endif
809 
810 
811 //----------------------------------------------------------------------------------------------------
812 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
813 //
814 // loc_ref: reference to source location of parallel region
815 // this_thr: thread data structure corresponding to implicit task
816 // team: team for this_thr
817 // tid: thread id of given thread within team
818 // set_curr_task: TRUE if need to push current task to thread
819 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
820 // TODO: Get better loc_ref. Value passed in may be NULL
821 
822 void
823 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
824 {
825  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
826 
827  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
828  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
829 
830  task->td_task_id = KMP_GEN_TASK_ID();
831  task->td_team = team;
832 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
833  task->td_ident = loc_ref;
834  task->td_taskwait_ident = NULL;
835  task->td_taskwait_counter = 0;
836  task->td_taskwait_thread = 0;
837 
838  task->td_flags.tiedness = TASK_TIED;
839  task->td_flags.tasktype = TASK_IMPLICIT;
840 #if OMP_45_ENABLED
841  task->td_flags.proxy = TASK_FULL;
842 #endif
843 
844  // All implicit tasks are executed immediately, not deferred
845  task->td_flags.task_serial = 1;
846  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
847  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
848 
849  task->td_flags.started = 1;
850  task->td_flags.executing = 1;
851  task->td_flags.complete = 0;
852  task->td_flags.freed = 0;
853 
854 #if OMP_40_ENABLED
855  task->td_depnode = NULL;
856 #endif
857 
858  if (set_curr_task) { // only do this initialization the first time a thread is created
859  task->td_incomplete_child_tasks = 0;
860  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
861 #if OMP_40_ENABLED
862  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
863  task->td_dephash = NULL;
864 #endif
865  __kmp_push_current_task_to_thread( this_thr, team, tid );
866  } else {
867  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
868  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
869  }
870 
871 #if OMPT_SUPPORT
872  __kmp_task_init_ompt(task, tid, NULL);
873 #endif
874 
875  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
876  tid, team, task ) );
877 }
878 
879 
880 //-----------------------------------------------------------------------------
886 //
887 void
888 __kmp_finish_implicit_task(kmp_info_t *thread)
889 {
890  kmp_taskdata_t *task = thread->th.th_current_task;
891  if (task->td_dephash)
892  __kmp_dephash_free_entries(thread, task->td_dephash);
893 }
894 
895 
896 //-----------------------------------------------------------------------------
901 //
902 void
903 __kmp_free_implicit_task(kmp_info_t *thread)
904 {
905  kmp_taskdata_t *task = thread->th.th_current_task;
906  if (task->td_dephash)
907  __kmp_dephash_free(thread, task->td_dephash);
908  task->td_dephash = NULL;
909 }
910 
911 
912 // Round up a size to a power of two specified by val
913 // Used to insert padding between structures co-allocated using a single malloc() call
914 static size_t
915 __kmp_round_up_to_val( size_t size, size_t val ) {
916  if ( size & ( val - 1 ) ) {
917  size &= ~ ( val - 1 );
918  if ( size <= KMP_SIZE_T_MAX - val ) {
919  size += val; // Round up if there is no overflow.
920  }; // if
921  }; // if
922  return size;
923 } // __kmp_round_up_to_va
924 
925 
926 //---------------------------------------------------------------------------------
927 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
928 //
929 // loc_ref: source location information
930 // gtid: global thread number.
931 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
932 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
933 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
934 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
935 // task_entry: Pointer to task code entry point generated by compiler.
936 // returns: a pointer to the allocated kmp_task_t structure (task).
937 
938 kmp_task_t *
939 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
940  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
941  kmp_routine_entry_t task_entry )
942 {
943  kmp_task_t *task;
944  kmp_taskdata_t *taskdata;
945  kmp_info_t *thread = __kmp_threads[ gtid ];
946  kmp_team_t *team = thread->th.th_team;
947  kmp_taskdata_t *parent_task = thread->th.th_current_task;
948  size_t shareds_offset;
949 
950  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
951  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
952  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
953  sizeof_shareds, task_entry) );
954 
955  if ( parent_task->td_flags.final ) {
956  if (flags->merged_if0) {
957  }
958  flags->final = 1;
959  }
960 
961 #if OMP_45_ENABLED
962  if ( flags->proxy == TASK_PROXY ) {
963  flags->tiedness = TASK_UNTIED;
964  flags->merged_if0 = 1;
965 
966  /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
967  if ( (thread->th.th_task_team) == NULL ) {
968  /* This should only happen if the team is serialized
969  setup a task team and propagate it to the thread
970  */
971  KMP_DEBUG_ASSERT(team->t.t_serialized);
972  KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
973  __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
974  thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
975  }
976  kmp_task_team_t * task_team = thread->th.th_task_team;
977 
978  /* tasking must be enabled now as the task might not be pushed */
979  if ( !KMP_TASKING_ENABLED( task_team ) ) {
980  KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
981  __kmp_enable_tasking( task_team, thread );
982  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
983  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
984  // No lock needed since only owner can allocate
985  if (thread_data -> td.td_deque == NULL ) {
986  __kmp_alloc_task_deque( thread, thread_data );
987  }
988  }
989 
990  if ( task_team->tt.tt_found_proxy_tasks == FALSE )
991  TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
992  }
993 #endif
994 
995  // Calculate shared structure offset including padding after kmp_task_t struct
996  // to align pointers in shared struct
997  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
998  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
999 
1000  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1001  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
1002  gtid, shareds_offset) );
1003  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
1004  gtid, sizeof_shareds) );
1005 
1006  // Avoid double allocation here by combining shareds with taskdata
1007  #if USE_FAST_MEMORY
1008  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
1009  #else /* ! USE_FAST_MEMORY */
1010  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
1011  #endif /* USE_FAST_MEMORY */
1012 
1013  task = KMP_TASKDATA_TO_TASK(taskdata);
1014 
1015  // Make sure task & taskdata are aligned appropriately
1016 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
1017  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
1018  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
1019 #else
1020  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
1021  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
1022 #endif
1023  if (sizeof_shareds > 0) {
1024  // Avoid double allocation here by combining shareds with taskdata
1025  task->shareds = & ((char *) taskdata)[ shareds_offset ];
1026  // Make sure shareds struct is aligned to pointer size
1027  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
1028  } else {
1029  task->shareds = NULL;
1030  }
1031  task->routine = task_entry;
1032  task->part_id = 0; // AC: Always start with 0 part id
1033 
1034  taskdata->td_task_id = KMP_GEN_TASK_ID();
1035  taskdata->td_team = team;
1036  taskdata->td_alloc_thread = thread;
1037  taskdata->td_parent = parent_task;
1038  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1039  taskdata->td_untied_count = 0;
1040  taskdata->td_ident = loc_ref;
1041  taskdata->td_taskwait_ident = NULL;
1042  taskdata->td_taskwait_counter = 0;
1043  taskdata->td_taskwait_thread = 0;
1044  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
1045 #if OMP_45_ENABLED
1046  // avoid copying icvs for proxy tasks
1047  if ( flags->proxy == TASK_FULL )
1048 #endif
1049  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
1050 
1051  taskdata->td_flags.tiedness = flags->tiedness;
1052  taskdata->td_flags.final = flags->final;
1053  taskdata->td_flags.merged_if0 = flags->merged_if0;
1054 #if OMP_40_ENABLED
1055  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1056 #endif // OMP_40_ENABLED
1057 #if OMP_45_ENABLED
1058  taskdata->td_flags.proxy = flags->proxy;
1059  taskdata->td_task_team = thread->th.th_task_team;
1060  taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1061 #endif
1062  taskdata->td_flags.tasktype = TASK_EXPLICIT;
1063 
1064  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1065  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1066 
1067  // GEH - TODO: fix this to copy parent task's value of team_serial flag
1068  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1069 
1070  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1071  // tasks are not left until program termination to execute. Also, it helps locality to execute
1072  // immediately.
1073  taskdata->td_flags.task_serial = ( parent_task->td_flags.final
1074  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1075 
1076  taskdata->td_flags.started = 0;
1077  taskdata->td_flags.executing = 0;
1078  taskdata->td_flags.complete = 0;
1079  taskdata->td_flags.freed = 0;
1080 
1081  taskdata->td_flags.native = flags->native;
1082 
1083  taskdata->td_incomplete_child_tasks = 0;
1084  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1085 #if OMP_40_ENABLED
1086  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1087  taskdata->td_dephash = NULL;
1088  taskdata->td_depnode = NULL;
1089 #endif
1090 
1091  // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1092 #if OMP_45_ENABLED
1093  if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1094 #else
1095  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1096 #endif
1097  {
1098  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1099 #if OMP_40_ENABLED
1100  if ( parent_task->td_taskgroup )
1101  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1102 #endif
1103  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1104  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1105  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1106  }
1107  }
1108 
1109  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1110  gtid, taskdata, taskdata->td_parent) );
1111 
1112 #if OMPT_SUPPORT
1113  __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
1114 #endif
1115 
1116  return task;
1117 }
1118 
1119 
1120 kmp_task_t *
1121 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1122  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1123  kmp_routine_entry_t task_entry )
1124 {
1125  kmp_task_t *retval;
1126  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1127 
1128  input_flags->native = FALSE;
1129  // __kmp_task_alloc() sets up all other runtime flags
1130 
1131 #if OMP_45_ENABLED
1132  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1133  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1134  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1135  input_flags->proxy ? "proxy" : "",
1136  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1137 #else
1138  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1139  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1140  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1141  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1142 #endif
1143 
1144  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1145  sizeof_shareds, task_entry );
1146 
1147  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1148 
1149  return retval;
1150 }
1151 
1152 //-----------------------------------------------------------
1153 // __kmp_invoke_task: invoke the specified task
1154 //
1155 // gtid: global thread ID of caller
1156 // task: the task to invoke
1157 // current_task: the task to resume after task invokation
1158 
1159 static void
1160 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1161 {
1162  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1163  kmp_uint64 cur_time;
1164 #if OMP_40_ENABLED
1165  int discard = 0 /* false */;
1166 #endif
1167  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1168  gtid, taskdata, current_task) );
1169  KMP_DEBUG_ASSERT(task);
1170 #if OMP_45_ENABLED
1171  if ( taskdata->td_flags.proxy == TASK_PROXY &&
1172  taskdata->td_flags.complete == 1)
1173  {
1174  // This is a proxy task that was already completed but it needs to run
1175  // its bottom-half finish
1176  KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1177  gtid, taskdata) );
1178 
1179  __kmp_bottom_half_finish_proxy(gtid,task);
1180 
1181  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1182 
1183  return;
1184  }
1185 #endif
1186 
1187 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1188  if(__kmp_forkjoin_frames_mode == 3) {
1189  // Get the current time stamp to measure task execution time to correct barrier imbalance time
1190  cur_time = __itt_get_timestamp();
1191  }
1192 #endif
1193 
1194 #if OMP_45_ENABLED
1195  // Proxy tasks are not handled by the runtime
1196  if ( taskdata->td_flags.proxy != TASK_PROXY )
1197 #endif
1198  __kmp_task_start( gtid, task, current_task );
1199 
1200 #if OMPT_SUPPORT
1201  ompt_thread_info_t oldInfo;
1202  kmp_info_t * thread;
1203  if (ompt_enabled) {
1204  // Store the threads states and restore them after the task
1205  thread = __kmp_threads[ gtid ];
1206  oldInfo = thread->th.ompt_thread_info;
1207  thread->th.ompt_thread_info.wait_id = 0;
1208  thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1209  taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1210  }
1211 #endif
1212 
1213 #if OMP_40_ENABLED
1214  // TODO: cancel tasks if the parallel region has also been cancelled
1215  // TODO: check if this sequence can be hoisted above __kmp_task_start
1216  // if cancellation has been enabled for this run ...
1217  if (__kmp_omp_cancellation) {
1218  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1219  kmp_team_t * this_team = this_thr->th.th_team;
1220  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1221  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1222  KMP_COUNT_BLOCK(TASK_cancelled);
1223  // this task belongs to a task group and we need to cancel it
1224  discard = 1 /* true */;
1225  }
1226  }
1227 
1228  //
1229  // Invoke the task routine and pass in relevant data.
1230  // Thunks generated by gcc take a different argument list.
1231  //
1232  if (!discard) {
1233 #if KMP_STATS_ENABLED
1234  KMP_COUNT_BLOCK(TASK_executed);
1235  switch(KMP_GET_THREAD_STATE()) {
1236  case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1237  case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1238  case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1239  case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1240  case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1241  default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1242  }
1243 #endif // KMP_STATS_ENABLED
1244 #endif // OMP_40_ENABLED
1245 
1246 #if OMPT_SUPPORT && OMPT_TRACE
1247  /* let OMPT know that we're about to run this task */
1248  if (ompt_enabled &&
1249  ompt_callbacks.ompt_callback(ompt_event_task_switch))
1250  {
1251  ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1252  current_task->ompt_task_info.task_id,
1253  taskdata->ompt_task_info.task_id);
1254  }
1255 #endif
1256 
1257 #ifdef KMP_GOMP_COMPAT
1258  if (taskdata->td_flags.native) {
1259  ((void (*)(void *))(*(task->routine)))(task->shareds);
1260  }
1261  else
1262 #endif /* KMP_GOMP_COMPAT */
1263  {
1264  (*(task->routine))(gtid, task);
1265  }
1266  KMP_POP_PARTITIONED_TIMER();
1267 
1268 #if OMPT_SUPPORT && OMPT_TRACE
1269  /* let OMPT know that we're returning to the callee task */
1270  if (ompt_enabled &&
1271  ompt_callbacks.ompt_callback(ompt_event_task_switch))
1272  {
1273  ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1274  taskdata->ompt_task_info.task_id,
1275  current_task->ompt_task_info.task_id);
1276  }
1277 #endif
1278 
1279 #if OMP_40_ENABLED
1280  }
1281 #endif // OMP_40_ENABLED
1282 
1283 
1284 #if OMPT_SUPPORT
1285  if (ompt_enabled) {
1286  thread->th.ompt_thread_info = oldInfo;
1287  taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1288  }
1289 #endif
1290 
1291 #if OMP_45_ENABLED
1292  // Proxy tasks are not handled by the runtime
1293  if ( taskdata->td_flags.proxy != TASK_PROXY )
1294 #endif
1295  __kmp_task_finish( gtid, task, current_task );
1296 
1297 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1298  // Barrier imbalance - correct arrive time after the task finished
1299  if(__kmp_forkjoin_frames_mode == 3) {
1300  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1301  if(this_thr->th.th_bar_arrive_time) {
1302  this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1303  }
1304  }
1305 #endif
1306  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1307  gtid, taskdata, current_task) );
1308  return;
1309 }
1310 
1311 //-----------------------------------------------------------------------
1312 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1313 //
1314 // loc_ref: location of original task pragma (ignored)
1315 // gtid: Global Thread ID of encountering thread
1316 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1317 // Returns:
1318 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1319 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1320 
1321 kmp_int32
1322 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1323 {
1324  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1325 
1326  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1327  gtid, loc_ref, new_taskdata ) );
1328 
1329  /* Should we execute the new task or queue it? For now, let's just always try to
1330  queue it. If the queue fills up, then we'll execute it. */
1331 
1332  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1333  { // Execute this task immediately
1334  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1335  new_taskdata->td_flags.task_serial = 1;
1336  __kmp_invoke_task( gtid, new_task, current_task );
1337  }
1338 
1339  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1340  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1341  new_taskdata ) );
1342 
1343  return TASK_CURRENT_NOT_QUEUED;
1344 }
1345 
1346 //---------------------------------------------------------------------
1347 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1348 // gtid: Global Thread ID of encountering thread
1349 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1350 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1351 // returns:
1352 //
1353 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1354 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1355 kmp_int32
1356 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1357 {
1358  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1359 
1360 #if OMPT_SUPPORT
1361  if (ompt_enabled) {
1362  new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1363  __builtin_frame_address(0);
1364  }
1365 #endif
1366 
1367  /* Should we execute the new task or queue it? For now, let's just always try to
1368  queue it. If the queue fills up, then we'll execute it. */
1369 #if OMP_45_ENABLED
1370  if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1371 #else
1372  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1373 #endif
1374  { // Execute this task immediately
1375  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1376  if ( serialize_immediate )
1377  new_taskdata -> td_flags.task_serial = 1;
1378  __kmp_invoke_task( gtid, new_task, current_task );
1379  }
1380 
1381 #if OMPT_SUPPORT
1382  if (ompt_enabled) {
1383  new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1384  }
1385 #endif
1386 
1387  return TASK_CURRENT_NOT_QUEUED;
1388 }
1389 
1390 //---------------------------------------------------------------------
1391 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1392 // the parent thread only!
1393 // loc_ref: location of original task pragma (ignored)
1394 // gtid: Global Thread ID of encountering thread
1395 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1396 // returns:
1397 //
1398 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1399 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1400 
1401 kmp_int32
1402 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1403 {
1404  kmp_int32 res;
1405  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1406 
1407 #if KMP_DEBUG
1408  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1409 #endif
1410  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1411  gtid, loc_ref, new_taskdata ) );
1412 
1413  res = __kmp_omp_task(gtid,new_task,true);
1414 
1415  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1416  gtid, loc_ref, new_taskdata ) );
1417  return res;
1418 }
1419 
1420 //-------------------------------------------------------------------------------------
1421 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1422 
1423 kmp_int32
1424 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1425 {
1426  kmp_taskdata_t * taskdata;
1427  kmp_info_t * thread;
1428  int thread_finished = FALSE;
1429  KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1430 
1431  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
1432 
1433  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1434  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1435 
1436  thread = __kmp_threads[ gtid ];
1437  taskdata = thread -> th.th_current_task;
1438 
1439 #if OMPT_SUPPORT && OMPT_TRACE
1440  ompt_task_id_t my_task_id;
1441  ompt_parallel_id_t my_parallel_id;
1442 
1443  if (ompt_enabled) {
1444  kmp_team_t *team = thread->th.th_team;
1445  my_task_id = taskdata->ompt_task_info.task_id;
1446  my_parallel_id = team->t.ompt_team_info.parallel_id;
1447 
1448  taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
1449  if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1450  ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1451  my_parallel_id, my_task_id);
1452  }
1453  }
1454 #endif
1455 
1456  // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
1457 #if USE_ITT_BUILD
1458  // Note: These values are used by ITT events as well.
1459 #endif /* USE_ITT_BUILD */
1460  taskdata->td_taskwait_counter += 1;
1461  taskdata->td_taskwait_ident = loc_ref;
1462  taskdata->td_taskwait_thread = gtid + 1;
1463 
1464 #if USE_ITT_BUILD
1465  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1466  if ( itt_sync_obj != NULL )
1467  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1468 #endif /* USE_ITT_BUILD */
1469 
1470 #if OMP_45_ENABLED
1471  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1472 #else
1473  if ( ! taskdata->td_flags.team_serial )
1474 #endif
1475  {
1476  // GEH: if team serialized, avoid reading the volatile variable below.
1477  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1478  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1479  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1480  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1481  }
1482  }
1483 #if USE_ITT_BUILD
1484  if ( itt_sync_obj != NULL )
1485  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1486 #endif /* USE_ITT_BUILD */
1487 
1488  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1489  // Debugger: The taskwait is completed. Location remains, but thread is negated.
1490  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1491 
1492 #if OMPT_SUPPORT && OMPT_TRACE
1493  if (ompt_enabled) {
1494  if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1495  ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
1496  my_parallel_id, my_task_id);
1497  }
1498  taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1499  }
1500 #endif
1501  }
1502 
1503  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1504  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1505 
1506  return TASK_CURRENT_NOT_QUEUED;
1507 }
1508 
1509 
1510 //-------------------------------------------------
1511 // __kmpc_omp_taskyield: switch to a different task
1512 
1513 kmp_int32
1514 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1515 {
1516  kmp_taskdata_t * taskdata;
1517  kmp_info_t * thread;
1518  int thread_finished = FALSE;
1519 
1520  KMP_COUNT_BLOCK(OMP_TASKYIELD);
1521  KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1522 
1523  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1524  gtid, loc_ref, end_part) );
1525 
1526  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1527  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1528 
1529  thread = __kmp_threads[ gtid ];
1530  taskdata = thread -> th.th_current_task;
1531  // Should we model this as a task wait or not?
1532  // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
1533 #if USE_ITT_BUILD
1534  // Note: These values are used by ITT events as well.
1535 #endif /* USE_ITT_BUILD */
1536  taskdata->td_taskwait_counter += 1;
1537  taskdata->td_taskwait_ident = loc_ref;
1538  taskdata->td_taskwait_thread = gtid + 1;
1539 
1540 #if USE_ITT_BUILD
1541  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1542  if ( itt_sync_obj != NULL )
1543  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1544 #endif /* USE_ITT_BUILD */
1545  if ( ! taskdata->td_flags.team_serial ) {
1546  kmp_task_team_t * task_team = thread->th.th_task_team;
1547  if (task_team != NULL) {
1548  if (KMP_TASKING_ENABLED(task_team)) {
1549  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1550  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1551  }
1552  }
1553  }
1554 #if USE_ITT_BUILD
1555  if ( itt_sync_obj != NULL )
1556  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1557 #endif /* USE_ITT_BUILD */
1558 
1559  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1560  // Debugger: The taskwait is completed. Location remains, but thread is negated.
1561  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1562  }
1563 
1564  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1565  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1566 
1567  return TASK_CURRENT_NOT_QUEUED;
1568 }
1569 
1570 
1571 #if OMP_40_ENABLED
1572 //-------------------------------------------------------------------------------------
1573 // __kmpc_taskgroup: Start a new taskgroup
1574 
1575 void
1576 __kmpc_taskgroup( ident_t* loc, int gtid )
1577 {
1578  kmp_info_t * thread = __kmp_threads[ gtid ];
1579  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1580  kmp_taskgroup_t * tg_new =
1581  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1582  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1583  tg_new->count = 0;
1584  tg_new->cancel_request = cancel_noreq;
1585  tg_new->parent = taskdata->td_taskgroup;
1586  taskdata->td_taskgroup = tg_new;
1587 }
1588 
1589 
1590 //-------------------------------------------------------------------------------------
1591 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1592 // and its descendants are complete
1593 
1594 void
1595 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1596 {
1597  kmp_info_t * thread = __kmp_threads[ gtid ];
1598  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1599  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1600  int thread_finished = FALSE;
1601 
1602  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1603  KMP_DEBUG_ASSERT( taskgroup != NULL );
1604  KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
1605 
1606  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1607 #if USE_ITT_BUILD
1608  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1609  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1610  if ( itt_sync_obj != NULL )
1611  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1612 #endif /* USE_ITT_BUILD */
1613 
1614 #if OMP_45_ENABLED
1615  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1616 #else
1617  if ( ! taskdata->td_flags.team_serial )
1618 #endif
1619  {
1620  kmp_flag_32 flag(&(taskgroup->count), 0U);
1621  while ( TCR_4(taskgroup->count) != 0 ) {
1622  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1623  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1624  }
1625  }
1626 
1627 #if USE_ITT_BUILD
1628  if ( itt_sync_obj != NULL )
1629  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1630 #endif /* USE_ITT_BUILD */
1631  }
1632  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1633 
1634  // Restore parent taskgroup for the current task
1635  taskdata->td_taskgroup = taskgroup->parent;
1636  __kmp_thread_free( thread, taskgroup );
1637 
1638  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1639 }
1640 #endif
1641 
1642 
1643 //------------------------------------------------------
1644 // __kmp_remove_my_task: remove a task from my own deque
1645 
1646 static kmp_task_t *
1647 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1648  kmp_int32 is_constrained )
1649 {
1650  kmp_task_t * task;
1651  kmp_taskdata_t * taskdata;
1652  kmp_thread_data_t *thread_data;
1653  kmp_uint32 tail;
1654 
1655  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1656  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1657 
1658  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1659 
1660  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1661  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1662  thread_data->td.td_deque_tail) );
1663 
1664  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1665  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1666  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1667  thread_data->td.td_deque_tail) );
1668  return NULL;
1669  }
1670 
1671  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1672 
1673  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1674  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1675  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1676  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1677  thread_data->td.td_deque_tail) );
1678  return NULL;
1679  }
1680 
1681  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
1682  taskdata = thread_data -> td.td_deque[ tail ];
1683 
1684  if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1685  // we need to check if the candidate obeys task scheduling constraint:
1686  // only child of current task can be scheduled
1687  kmp_taskdata_t * current = thread->th.th_current_task;
1688  kmp_int32 level = current->td_level;
1689  kmp_taskdata_t * parent = taskdata->td_parent;
1690  while ( parent != current && parent->td_level > level ) {
1691  parent = parent->td_parent; // check generation up to the level of the current task
1692  KMP_DEBUG_ASSERT(parent != NULL);
1693  }
1694  if ( parent != current ) {
1695  // If the tail task is not a child, then no other child can appear in the deque.
1696  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1697  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1698  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1699  thread_data->td.td_deque_tail) );
1700  return NULL;
1701  }
1702  }
1703 
1704  thread_data -> td.td_deque_tail = tail;
1705  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1706 
1707  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1708 
1709  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1710  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1711  thread_data->td.td_deque_tail) );
1712 
1713  task = KMP_TASKDATA_TO_TASK( taskdata );
1714  return task;
1715 }
1716 
1717 
1718 //-----------------------------------------------------------
1719 // __kmp_steal_task: remove a task from another thread's deque
1720 // Assume that calling thread has already checked existence of
1721 // task_team thread_data before calling this routine.
1722 
1723 static kmp_task_t *
1724 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1725  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1726  kmp_int32 is_constrained )
1727 {
1728  kmp_task_t * task;
1729  kmp_taskdata_t * taskdata;
1730  kmp_thread_data_t *victim_td, *threads_data;
1731  kmp_int32 victim_tid;
1732 
1733  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1734 
1735  threads_data = task_team -> tt.tt_threads_data;
1736  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1737 
1738  victim_tid = victim->th.th_info.ds.ds_tid;
1739  victim_td = & threads_data[ victim_tid ];
1740 
1741  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1742  "head=%u tail=%u\n",
1743  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1744  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1745 
1746  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1747  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1748  {
1749  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1750  "ntasks=%d head=%u tail=%u\n",
1751  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1752  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1753  return NULL;
1754  }
1755 
1756  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1757 
1758  // Check again after we acquire the lock
1759  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1760  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1761  {
1762  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1763  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1764  "ntasks=%d head=%u tail=%u\n",
1765  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1766  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1767  return NULL;
1768  }
1769 
1770  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1771 
1772  if ( !is_constrained ) {
1773  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1774  KMP_ASSERT(taskdata);
1775  // Bump head pointer and Wrap.
1776  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK(victim_td->td);
1777  } else {
1778  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1779  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(victim_td->td); // Wrap index.
1780  taskdata = victim_td -> td.td_deque[ tail ];
1781  KMP_ASSERT(taskdata);
1782  // we need to check if the candidate obeys task scheduling constraint:
1783  // only child of current task can be scheduled
1784  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1785  kmp_int32 level = current->td_level;
1786  kmp_taskdata_t * parent = taskdata->td_parent;
1787  while ( parent != current && parent->td_level > level ) {
1788  parent = parent->td_parent; // check generation up to the level of the current task
1789  KMP_DEBUG_ASSERT(parent != NULL);
1790  }
1791  if ( parent != current ) {
1792  // If the tail task is not a descendant of the current task then do not steal it.
1793  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1794  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1795  "ntasks=%d head=%u tail=%u\n",
1796  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1797  task_team, victim_td->td.td_deque_ntasks,
1798  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1799  return NULL;
1800  }
1801  victim_td -> td.td_deque_tail = tail;
1802  }
1803  if (*thread_finished) {
1804  // We need to un-mark this victim as a finished victim. This must be done before
1805  // releasing the lock, or else other threads (starting with the master victim)
1806  // might be prematurely released from the barrier!!!
1807  kmp_uint32 count;
1808 
1809  count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1810 
1811  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1812  gtid, count + 1, task_team) );
1813 
1814  *thread_finished = FALSE;
1815  }
1816  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1817 
1818  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1819 
1820  KMP_COUNT_BLOCK(TASK_stolen);
1821  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1822  "ntasks=%d head=%u tail=%u\n",
1823  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1824  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1825  victim_td->td.td_deque_tail) );
1826 
1827  task = KMP_TASKDATA_TO_TASK( taskdata );
1828  return task;
1829 }
1830 
1831 
1832 //-----------------------------------------------------------------------------
1833 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1834 // is statisfied (return true) or there are none left (return false).
1835 // final_spin is TRUE if this is the spin at the release barrier.
1836 // thread_finished indicates whether the thread is finished executing all
1837 // the tasks it has on its deque, and is at the release barrier.
1838 // spinner is the location on which to spin.
1839 // spinner == NULL means only execute a single task and return.
1840 // checker is the value to check to terminate the spin.
1841 template <class C>
1842 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1843  int *thread_finished
1844  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1845 {
1846  kmp_task_team_t * task_team = thread->th.th_task_team;
1847  kmp_thread_data_t * threads_data;
1848  kmp_task_t * task;
1849  kmp_info_t * other_thread;
1850  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1851  volatile kmp_uint32 * unfinished_threads;
1852  kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
1853 
1854  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1855  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1856 
1857  if (task_team == NULL) return FALSE;
1858 
1859  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1860  gtid, final_spin, *thread_finished) );
1861 
1862  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1863  KMP_DEBUG_ASSERT( threads_data != NULL );
1864 
1865  nthreads = task_team -> tt.tt_nproc;
1866  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1867 #if OMP_45_ENABLED
1868  KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1869 #else
1870  KMP_DEBUG_ASSERT( nthreads > 1 );
1871 #endif
1872  KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
1873 
1874  while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
1875  while (1) { // Inner loop to find a task and execute it
1876  task = NULL;
1877  if (use_own_tasks) { // check on own queue first
1878  task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
1879  }
1880  if ((task == NULL) && (nthreads > 1)) { // Steal a task
1881  int asleep = 1;
1882  use_own_tasks = 0;
1883  // Try to steal from the last place I stole from successfully.
1884  if (victim == -2) { // haven't stolen anything yet
1885  victim = threads_data[tid].td.td_deque_last_stolen;
1886  if (victim != -1) // if we have a last stolen from victim, get the thread
1887  other_thread = threads_data[victim].td.td_thr;
1888  }
1889  if (victim != -1) { // found last victim
1890  asleep = 0;
1891  }
1892  else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
1893  do { // Find a different thread to steal work from.
1894  // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
1895  // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
1896  victim = __kmp_get_random(thread) % (nthreads - 1);
1897  if (victim >= tid) {
1898  ++victim; // Adjusts random distribution to exclude self
1899  }
1900  // Found a potential victim
1901  other_thread = threads_data[victim].td.td_thr;
1902  // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
1903  // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
1904  // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
1905  // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
1906  // do not sleep at the separate tasking barrier, so this isn't a problem.
1907  asleep = 0;
1908  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1909  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1910  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
1911  asleep = 1;
1912  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1913  // A sleeping thread should not have any tasks on it's queue. There is a slight
1914  // possibility that it resumes, steals a task from another thread, which spawns more
1915  // tasks, all in the time that it takes this thread to check => don't write an assertion
1916  // that the victim's queue is empty. Try stealing from a different thread.
1917  }
1918  } while (asleep);
1919  }
1920 
1921  if (!asleep) {
1922  // We have a victim to try to steal from
1923  task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
1924  }
1925  if (task != NULL) { // set last stolen to victim
1926  if (threads_data[tid].td.td_deque_last_stolen != victim) {
1927  threads_data[tid].td.td_deque_last_stolen = victim;
1928  // The pre-refactored code did not try more than 1 successful new vicitm,
1929  // unless the last one generated more local tasks; new_victim keeps track of this
1930  new_victim = 1;
1931  }
1932  }
1933  else { // No tasks found; unset last_stolen
1934  KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
1935  victim = -2; // no successful victim found
1936  }
1937  }
1938 
1939  if (task == NULL) // break out of tasking loop
1940  break;
1941 
1942  // Found a task; execute it
1943 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1944  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1945  if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
1946  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1947  }
1948  __kmp_itt_task_starting( itt_sync_obj );
1949  }
1950 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1951  __kmp_invoke_task( gtid, task, current_task );
1952 #if USE_ITT_BUILD
1953  if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
1954 #endif /* USE_ITT_BUILD */
1955  // If this thread is only partway through the barrier and the condition is met, then return now,
1956  // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
1957  // in the barrier, waiting to be released, we know that the termination condition will not be
1958  // satisified, so don't waste any cycles checking it.
1959  if (flag == NULL || (!final_spin && flag->done_check())) {
1960  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
1961  return TRUE;
1962  }
1963  if (thread->th.th_task_team == NULL) {
1964  break;
1965  }
1966  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1967  // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
1968  if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
1969  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
1970  use_own_tasks = 1;
1971  new_victim = 0;
1972  }
1973  }
1974 
1975  // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
1976 #if OMP_45_ENABLED
1977  // The work queue may be empty but there might be proxy tasks still executing
1978  if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
1979 #else
1980  if (final_spin)
1981 #endif
1982  {
1983  // First, decrement the #unfinished threads, if that has not already been done. This decrement
1984  // might be to the spin location, and result in the termination condition being satisfied.
1985  if (! *thread_finished) {
1986  kmp_uint32 count;
1987 
1988  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1989  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
1990  gtid, count, task_team) );
1991  *thread_finished = TRUE;
1992  }
1993 
1994  // It is now unsafe to reference thread->th.th_team !!!
1995  // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
1996  // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
1997  // If we can steal more work, we know that this has not happened yet.
1998  if (flag != NULL && flag->done_check()) {
1999  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
2000  return TRUE;
2001  }
2002  }
2003 
2004  // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
2005  if (thread->th.th_task_team == NULL) {
2006  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
2007  return FALSE;
2008  }
2009 
2010 #if OMP_45_ENABLED
2011  // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
2012  // tasks from own queue
2013  if (nthreads == 1)
2014  use_own_tasks = 1;
2015  else
2016 #endif
2017  {
2018  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
2019  return FALSE;
2020  }
2021  }
2022 }
2023 
2024 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2025  int *thread_finished
2026  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2027 {
2028  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2029  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2030 }
2031 
2032 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2033  int *thread_finished
2034  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2035 {
2036  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2037  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2038 }
2039 
2040 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2041  int *thread_finished
2042  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2043 {
2044  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2045  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2046 }
2047 
2048 
2049 
2050 //-----------------------------------------------------------------------------
2051 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2052 // next barrier so they can assist in executing enqueued tasks.
2053 // First thread in allocates the task team atomically.
2054 
2055 static void
2056 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2057 {
2058  kmp_thread_data_t *threads_data;
2059  int nthreads, i, is_init_thread;
2060 
2061  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2062  __kmp_gtid_from_thread( this_thr ) ) );
2063 
2064  KMP_DEBUG_ASSERT(task_team != NULL);
2065  KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2066 
2067  nthreads = task_team->tt.tt_nproc;
2068  KMP_DEBUG_ASSERT(nthreads > 0);
2069  KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2070 
2071  // Allocate or increase the size of threads_data if necessary
2072  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2073 
2074  if (!is_init_thread) {
2075  // Some other thread already set up the array.
2076  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2077  __kmp_gtid_from_thread( this_thr ) ) );
2078  return;
2079  }
2080  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2081  KMP_DEBUG_ASSERT( threads_data != NULL );
2082 
2083  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2084  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2085  {
2086  // Release any threads sleeping at the barrier, so that they can steal
2087  // tasks and execute them. In extra barrier mode, tasks do not sleep
2088  // at the separate tasking barrier, so this isn't a problem.
2089  for (i = 0; i < nthreads; i++) {
2090  volatile void *sleep_loc;
2091  kmp_info_t *thread = threads_data[i].td.td_thr;
2092 
2093  if (i == this_thr->th.th_info.ds.ds_tid) {
2094  continue;
2095  }
2096  // Since we haven't locked the thread's suspend mutex lock at this
2097  // point, there is a small window where a thread might be putting
2098  // itself to sleep, but hasn't set the th_sleep_loc field yet.
2099  // To work around this, __kmp_execute_tasks_template() periodically checks
2100  // see if other threads are sleeping (using the same random
2101  // mechanism that is used for task stealing) and awakens them if
2102  // they are.
2103  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2104  {
2105  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2106  __kmp_gtid_from_thread( this_thr ),
2107  __kmp_gtid_from_thread( thread ) ) );
2108  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2109  }
2110  else {
2111  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2112  __kmp_gtid_from_thread( this_thr ),
2113  __kmp_gtid_from_thread( thread ) ) );
2114  }
2115  }
2116  }
2117 
2118  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2119  __kmp_gtid_from_thread( this_thr ) ) );
2120 }
2121 
2122 
2123 /* ------------------------------------------------------------------------ */
2124 /* // TODO: Check the comment consistency
2125  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2126  * like a shadow of the kmp_team_t data struct, with a different lifetime.
2127  * After a child * thread checks into a barrier and calls __kmp_release() from
2128  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2129  * longer assume that the kmp_team_t structure is intact (at any moment, the
2130  * master thread may exit the barrier code and free the team data structure,
2131  * and return the threads to the thread pool).
2132  *
2133  * This does not work with the the tasking code, as the thread is still
2134  * expected to participate in the execution of any tasks that may have been
2135  * spawned my a member of the team, and the thread still needs access to all
2136  * to each thread in the team, so that it can steal work from it.
2137  *
2138  * Enter the existence of the kmp_task_team_t struct. It employs a reference
2139  * counting mechanims, and is allocated by the master thread before calling
2140  * __kmp_<barrier_kind>_release, and then is release by the last thread to
2141  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2142  * of the kmp_task_team_t structs for consecutive barriers can overlap
2143  * (and will, unless the master thread is the last thread to exit the barrier
2144  * release phase, which is not typical).
2145  *
2146  * The existence of such a struct is useful outside the context of tasking,
2147  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2148  * so that any performance differences show up when comparing the 2.5 vs. 3.0
2149  * libraries.
2150  *
2151  * We currently use the existence of the threads array as an indicator that
2152  * tasks were spawned since the last barrier. If the structure is to be
2153  * useful outside the context of tasking, then this will have to change, but
2154  * not settting the field minimizes the performance impact of tasking on
2155  * barriers, when no explicit tasks were spawned (pushed, actually).
2156  */
2157 
2158 
2159 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2160 // Lock for task team data structures
2161 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2162 
2163 
2164 //------------------------------------------------------------------------------
2165 // __kmp_alloc_task_deque:
2166 // Allocates a task deque for a particular thread, and initialize the necessary
2167 // data structures relating to the deque. This only happens once per thread
2168 // per task team since task teams are recycled.
2169 // No lock is needed during allocation since each thread allocates its own
2170 // deque.
2171 
2172 static void
2173 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2174 {
2175  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2176  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2177 
2178  // Initialize last stolen task field to "none"
2179  thread_data -> td.td_deque_last_stolen = -1;
2180 
2181  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2182  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2183  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2184 
2185  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2186  __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
2187  // Allocate space for task deque, and zero the deque
2188  // Cannot use __kmp_thread_calloc() because threads not around for
2189  // kmp_reap_task_team( ).
2190  thread_data -> td.td_deque = (kmp_taskdata_t **)
2191  __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2192  thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2193 }
2194 
2195 //------------------------------------------------------------------------------
2196 // __kmp_realloc_task_deque:
2197 // Re-allocates a task deque for a particular thread, copies the content from the old deque
2198 // and adjusts the necessary data structures relating to the deque.
2199 // This operation must be done with a the deque_lock being held
2200 
2201 static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2202 {
2203  kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2204  kmp_int32 new_size = 2 * size;
2205 
2206  KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2207  __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2208 
2209  kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2210 
2211  int i,j;
2212  for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2213  new_deque[j] = thread_data->td.td_deque[i];
2214 
2215  __kmp_free(thread_data->td.td_deque);
2216 
2217  thread_data -> td.td_deque_head = 0;
2218  thread_data -> td.td_deque_tail = size;
2219  thread_data -> td.td_deque = new_deque;
2220  thread_data -> td.td_deque_size = new_size;
2221 }
2222 
2223 //------------------------------------------------------------------------------
2224 // __kmp_free_task_deque:
2225 // Deallocates a task deque for a particular thread.
2226 // Happens at library deallocation so don't need to reset all thread data fields.
2227 
2228 static void
2229 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2230 {
2231  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2232 
2233  if ( thread_data -> td.td_deque != NULL ) {
2234  TCW_4(thread_data -> td.td_deque_ntasks, 0);
2235  __kmp_free( thread_data -> td.td_deque );
2236  thread_data -> td.td_deque = NULL;
2237  }
2238  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2239 
2240 #ifdef BUILD_TIED_TASK_STACK
2241  // GEH: Figure out what to do here for td_susp_tied_tasks
2242  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2243  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2244  }
2245 #endif // BUILD_TIED_TASK_STACK
2246 }
2247 
2248 
2249 //------------------------------------------------------------------------------
2250 // __kmp_realloc_task_threads_data:
2251 // Allocates a threads_data array for a task team, either by allocating an initial
2252 // array or enlarging an existing array. Only the first thread to get the lock
2253 // allocs or enlarges the array and re-initializes the array eleemnts.
2254 // That thread returns "TRUE", the rest return "FALSE".
2255 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
2256 // The current size is given by task_team -> tt.tt_max_threads.
2257 
2258 static int
2259 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2260 {
2261  kmp_thread_data_t ** threads_data_p;
2262  kmp_int32 nthreads, maxthreads;
2263  int is_init_thread = FALSE;
2264 
2265  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2266  // Already reallocated and initialized.
2267  return FALSE;
2268  }
2269 
2270  threads_data_p = & task_team -> tt.tt_threads_data;
2271  nthreads = task_team -> tt.tt_nproc;
2272  maxthreads = task_team -> tt.tt_max_threads;
2273 
2274  // All threads must lock when they encounter the first task of the implicit task
2275  // region to make sure threads_data fields are (re)initialized before used.
2276  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2277 
2278  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2279  // first thread to enable tasking
2280  kmp_team_t *team = thread -> th.th_team;
2281  int i;
2282 
2283  is_init_thread = TRUE;
2284  if ( maxthreads < nthreads ) {
2285 
2286  if ( *threads_data_p != NULL ) {
2287  kmp_thread_data_t *old_data = *threads_data_p;
2288  kmp_thread_data_t *new_data = NULL;
2289 
2290  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2291  "threads data for task_team %p, new_size = %d, old_size = %d\n",
2292  __kmp_gtid_from_thread( thread ), task_team,
2293  nthreads, maxthreads ) );
2294  // Reallocate threads_data to have more elements than current array
2295  // Cannot use __kmp_thread_realloc() because threads not around for
2296  // kmp_reap_task_team( ). Note all new array entries are initialized
2297  // to zero by __kmp_allocate().
2298  new_data = (kmp_thread_data_t *)
2299  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2300  // copy old data to new data
2301  KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2302  (void *) old_data,
2303  maxthreads * sizeof(kmp_taskdata_t *) );
2304 
2305 #ifdef BUILD_TIED_TASK_STACK
2306  // GEH: Figure out if this is the right thing to do
2307  for (i = maxthreads; i < nthreads; i++) {
2308  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2309  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2310  }
2311 #endif // BUILD_TIED_TASK_STACK
2312  // Install the new data and free the old data
2313  (*threads_data_p) = new_data;
2314  __kmp_free( old_data );
2315  }
2316  else {
2317  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2318  "threads data for task_team %p, size = %d\n",
2319  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2320  // Make the initial allocate for threads_data array, and zero entries
2321  // Cannot use __kmp_thread_calloc() because threads not around for
2322  // kmp_reap_task_team( ).
2323  *threads_data_p = (kmp_thread_data_t *)
2324  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2325 #ifdef BUILD_TIED_TASK_STACK
2326  // GEH: Figure out if this is the right thing to do
2327  for (i = 0; i < nthreads; i++) {
2328  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2329  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2330  }
2331 #endif // BUILD_TIED_TASK_STACK
2332  }
2333  task_team -> tt.tt_max_threads = nthreads;
2334  }
2335  else {
2336  // If array has (more than) enough elements, go ahead and use it
2337  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2338  }
2339 
2340  // initialize threads_data pointers back to thread_info structures
2341  for (i = 0; i < nthreads; i++) {
2342  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2343  thread_data -> td.td_thr = team -> t.t_threads[i];
2344 
2345  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2346  // The last stolen field survives across teams / barrier, and the number
2347  // of threads may have changed. It's possible (likely?) that a new
2348  // parallel region will exhibit the same behavior as the previous region.
2349  thread_data -> td.td_deque_last_stolen = -1;
2350  }
2351  }
2352 
2353  KMP_MB();
2354  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2355  }
2356 
2357  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2358  return is_init_thread;
2359 }
2360 
2361 
2362 //------------------------------------------------------------------------------
2363 // __kmp_free_task_threads_data:
2364 // Deallocates a threads_data array for a task team, including any attached
2365 // tasking deques. Only occurs at library shutdown.
2366 
2367 static void
2368 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2369 {
2370  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2371  if ( task_team -> tt.tt_threads_data != NULL ) {
2372  int i;
2373  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2374  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2375  }
2376  __kmp_free( task_team -> tt.tt_threads_data );
2377  task_team -> tt.tt_threads_data = NULL;
2378  }
2379  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2380 }
2381 
2382 
2383 //------------------------------------------------------------------------------
2384 // __kmp_allocate_task_team:
2385 // Allocates a task team associated with a specific team, taking it from
2386 // the global task team free list if possible. Also initializes data structures.
2387 
2388 static kmp_task_team_t *
2389 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2390 {
2391  kmp_task_team_t *task_team = NULL;
2392  int nthreads;
2393 
2394  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2395  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2396 
2397  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2398  // Take a task team from the task team pool
2399  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2400  if (__kmp_free_task_teams != NULL) {
2401  task_team = __kmp_free_task_teams;
2402  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2403  task_team -> tt.tt_next = NULL;
2404  }
2405  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2406  }
2407 
2408  if (task_team == NULL) {
2409  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2410  "task team for team %p\n",
2411  __kmp_gtid_from_thread( thread ), team ) );
2412  // Allocate a new task team if one is not available.
2413  // Cannot use __kmp_thread_malloc() because threads not around for
2414  // kmp_reap_task_team( ).
2415  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2416  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2417  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2418  //task_team -> tt.tt_max_threads = 0;
2419  //task_team -> tt.tt_next = NULL;
2420  }
2421 
2422  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2423 #if OMP_45_ENABLED
2424  TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2425 #endif
2426  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2427 
2428  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2429  TCW_4( task_team -> tt.tt_active, TRUE );
2430 
2431  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2432  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
2433  return task_team;
2434 }
2435 
2436 
2437 //------------------------------------------------------------------------------
2438 // __kmp_free_task_team:
2439 // Frees the task team associated with a specific thread, and adds it
2440 // to the global task team free list.
2441 
2442 void
2443 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2444 {
2445  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2446  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2447 
2448  // Put task team back on free list
2449  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2450 
2451  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2452  task_team -> tt.tt_next = __kmp_free_task_teams;
2453  TCW_PTR(__kmp_free_task_teams, task_team);
2454 
2455  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2456 }
2457 
2458 
2459 //------------------------------------------------------------------------------
2460 // __kmp_reap_task_teams:
2461 // Free all the task teams on the task team free list.
2462 // Should only be done during library shutdown.
2463 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2464 
2465 void
2466 __kmp_reap_task_teams( void )
2467 {
2468  kmp_task_team_t *task_team;
2469 
2470  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2471  // Free all task_teams on the free list
2472  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2473  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2474  __kmp_free_task_teams = task_team -> tt.tt_next;
2475  task_team -> tt.tt_next = NULL;
2476 
2477  // Free threads_data if necessary
2478  if ( task_team -> tt.tt_threads_data != NULL ) {
2479  __kmp_free_task_threads_data( task_team );
2480  }
2481  __kmp_free( task_team );
2482  }
2483  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2484  }
2485 }
2486 
2487 //------------------------------------------------------------------------------
2488 // __kmp_wait_to_unref_task_teams:
2489 // Some threads could still be in the fork barrier release code, possibly
2490 // trying to steal tasks. Wait for each thread to unreference its task team.
2491 //
2492 void
2493 __kmp_wait_to_unref_task_teams(void)
2494 {
2495  kmp_info_t *thread;
2496  kmp_uint32 spins;
2497  int done;
2498 
2499  KMP_INIT_YIELD( spins );
2500 
2501  for (;;) {
2502  done = TRUE;
2503 
2504  // TODO: GEH - this may be is wrong because some sync would be necessary
2505  // in case threads are added to the pool during the traversal.
2506  // Need to verify that lock for thread pool is held when calling
2507  // this routine.
2508  for (thread = (kmp_info_t *)__kmp_thread_pool;
2509  thread != NULL;
2510  thread = thread->th.th_next_pool)
2511  {
2512 #if KMP_OS_WINDOWS
2513  DWORD exit_val;
2514 #endif
2515  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2516  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2517  __kmp_gtid_from_thread( thread ) ) );
2518  continue;
2519  }
2520 #if KMP_OS_WINDOWS
2521  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2522  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2523  thread->th.th_task_team = NULL;
2524  continue;
2525  }
2526 #endif
2527 
2528  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2529 
2530  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2531  __kmp_gtid_from_thread( thread ) ) );
2532 
2533  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2534  volatile void *sleep_loc;
2535  // If the thread is sleeping, awaken it.
2536  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2537  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2538  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2539  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2540  }
2541  }
2542  }
2543  if (done) {
2544  break;
2545  }
2546 
2547  // If we are oversubscribed,
2548  // or have waited a bit (and library mode is throughput), yield.
2549  // Pause is in the following code.
2550  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2551  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2552  }
2553 }
2554 
2555 
2556 //------------------------------------------------------------------------------
2557 // __kmp_task_team_setup: Create a task_team for the current team, but use
2558 // an already created, unused one if it already exists.
2559 void
2560 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
2561 {
2562  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2563 
2564  // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2565  // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2566  if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
2567  team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2568  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2569  __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2570  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2571  }
2572 
2573  // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2574  // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2575  // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2576  // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2577  // serialized teams.
2578  if (team->t.t_nproc > 1) {
2579  int other_team = 1 - this_thr->th.th_task_state;
2580  if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2581  team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2582  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2583  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2584  ((team != NULL) ? team->t.t_id : -1), other_team ));
2585  }
2586  else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2587  kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2588  if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2589  TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2590  TCW_4(task_team->tt.tt_found_tasks, FALSE);
2591 #if OMP_45_ENABLED
2592  TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2593 #endif
2594  TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2595  TCW_4(task_team->tt.tt_active, TRUE );
2596  }
2597  // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2598  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2599  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2600  ((team != NULL) ? team->t.t_id : -1), other_team ));
2601  }
2602  }
2603 }
2604 
2605 
2606 //------------------------------------------------------------------------------
2607 // __kmp_task_team_sync: Propagation of task team data from team to threads
2608 // which happens just after the release phase of a team barrier. This may be
2609 // called by any thread, but only for teams with # threads > 1.
2610 
2611 void
2612 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2613 {
2614  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2615 
2616  // Toggle the th_task_state field, to switch which task_team this thread refers to
2617  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2618  // It is now safe to propagate the task team pointer from the team struct to the current thread.
2619  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2620  KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
2621  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2622  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2623 }
2624 
2625 
2626 //--------------------------------------------------------------------------------------------
2627 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
2628 // phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2629 // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2630 // optionally as the last argument. When wait is zero, master thread does not wait for
2631 // unfinished_threads to reach 0.
2632 void
2633 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2634  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2635  , int wait)
2636 {
2637  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2638 
2639  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2640  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2641 
2642  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2643  if (wait) {
2644  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2645  __kmp_gtid_from_thread(this_thr), task_team));
2646  // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2647  // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2648  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2649  flag.wait(this_thr, TRUE
2650  USE_ITT_BUILD_ARG(itt_sync_obj));
2651  }
2652  // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2653  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2654  "setting active to false, setting local and team's pointer to NULL\n",
2655  __kmp_gtid_from_thread(this_thr), task_team));
2656 #if OMP_45_ENABLED
2657  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2658  TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2659 #else
2660  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2661 #endif
2662  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2663  KMP_MB();
2664 
2665  TCW_PTR(this_thr->th.th_task_team, NULL);
2666  }
2667 }
2668 
2669 
2670 //------------------------------------------------------------------------------
2671 // __kmp_tasking_barrier:
2672 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2673 // Internal function to execute all tasks prior to a regular barrier or a
2674 // join barrier. It is a full barrier itself, which unfortunately turns
2675 // regular barriers into double barriers and join barriers into 1 1/2
2676 // barriers.
2677 void
2678 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2679 {
2680  volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2681  int flag = FALSE;
2682  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2683 
2684 #if USE_ITT_BUILD
2685  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2686 #endif /* USE_ITT_BUILD */
2687  kmp_flag_32 spin_flag(spin, 0U);
2688  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2689  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2690 #if USE_ITT_BUILD
2691  // TODO: What about itt_sync_obj??
2692  KMP_FSYNC_SPIN_PREPARE( spin );
2693 #endif /* USE_ITT_BUILD */
2694 
2695  if( TCR_4(__kmp_global.g.g_done) ) {
2696  if( __kmp_global.g.g_abort )
2697  __kmp_abort_thread( );
2698  break;
2699  }
2700  KMP_YIELD( TRUE ); // GH: We always yield here
2701  }
2702 #if USE_ITT_BUILD
2703  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2704 #endif /* USE_ITT_BUILD */
2705 }
2706 
2707 
2708 #if OMP_45_ENABLED
2709 
2710 /* __kmp_give_task puts a task into a given thread queue if:
2711  - the queue for that thread was created
2712  - there's space in that queue
2713 
2714  Because of this, __kmp_push_task needs to check if there's space after getting the lock
2715  */
2716 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
2717 {
2718  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2719  kmp_task_team_t * task_team = taskdata->td_task_team;
2720 
2721  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2722 
2723  // If task_team is NULL something went really bad...
2724  KMP_DEBUG_ASSERT( task_team != NULL );
2725 
2726  bool result = false;
2727  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2728 
2729  if (thread_data -> td.td_deque == NULL ) {
2730  // There's no queue in this thread, go find another one
2731  // We're guaranteed that at least one thread has a queue
2732  KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2733  return result;
2734  }
2735 
2736  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2737  {
2738  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2739 
2740  // if this deque is bigger than the pass ratio give a chance to another thread
2741  if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2742 
2743  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2744  __kmp_realloc_task_deque(thread,thread_data);
2745 
2746  } else {
2747 
2748  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2749 
2750  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2751  {
2752  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2753 
2754  // if this deque is bigger than the pass ratio give a chance to another thread
2755  if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2756  goto release_and_exit;
2757 
2758  __kmp_realloc_task_deque(thread,thread_data);
2759  }
2760  }
2761 
2762  // lock is held here, and there is space in the deque
2763 
2764  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2765  // Wrap index.
2766  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
2767  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2768 
2769  result = true;
2770  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2771 
2772 release_and_exit:
2773  __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2774 
2775  return result;
2776 }
2777 
2778 
2779 /* The finish of the a proxy tasks is divided in two pieces:
2780  - the top half is the one that can be done from a thread outside the team
2781  - the bottom half must be run from a them within the team
2782 
2783  In order to run the bottom half the task gets queued back into one of the threads of the team.
2784  Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2785  So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2786  - things that can be run before queuing the bottom half
2787  - things that must be run after queuing the bottom half
2788 
2789  This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2790  we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2791 */
2792 
2793 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2794 {
2795  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2796  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2797  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2798  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2799 
2800  taskdata -> td_flags.complete = 1; // mark the task as completed
2801 
2802  if ( taskdata->td_taskgroup )
2803  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2804 
2805  // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2806  TCI_4(taskdata->td_incomplete_child_tasks);
2807 }
2808 
2809 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2810 {
2811  kmp_int32 children = 0;
2812 
2813  // Predecrement simulated by "- 1" calculation
2814  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2815  KMP_DEBUG_ASSERT( children >= 0 );
2816 
2817  // Remove the imaginary children
2818  TCD_4(taskdata->td_incomplete_child_tasks);
2819 }
2820 
2821 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2822 {
2823  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2824  kmp_info_t * thread = __kmp_threads[ gtid ];
2825 
2826  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2827  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2828 
2829  // We need to wait to make sure the top half is finished
2830  // Spinning here should be ok as this should happen quickly
2831  while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2832 
2833  __kmp_release_deps(gtid,taskdata);
2834  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2835 }
2836 
2844 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2845 {
2846  KMP_DEBUG_ASSERT( ptask != NULL );
2847  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2848  KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2849 
2850  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2851 
2852  __kmp_first_top_half_finish_proxy(taskdata);
2853  __kmp_second_top_half_finish_proxy(taskdata);
2854  __kmp_bottom_half_finish_proxy(gtid,ptask);
2855 
2856  KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2857 }
2858 
2865 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2866 {
2867  KMP_DEBUG_ASSERT( ptask != NULL );
2868  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2869 
2870  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2871 
2872  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2873 
2874  __kmp_first_top_half_finish_proxy(taskdata);
2875 
2876  // Enqueue task to complete bottom half completion from a thread within the corresponding team
2877  kmp_team_t * team = taskdata->td_team;
2878  kmp_int32 nthreads = team->t.t_nproc;
2879  kmp_info_t *thread;
2880 
2881  //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2882  kmp_int32 start_k = 0;
2883  kmp_int32 pass = 1;
2884  kmp_int32 k = start_k;
2885 
2886  do {
2887  //For now we're just linearly trying to find a thread
2888  thread = team->t.t_threads[k];
2889  k = (k+1) % nthreads;
2890 
2891  // we did a full pass through all the threads
2892  if ( k == start_k ) pass = pass << 1;
2893 
2894  } while ( !__kmp_give_task( thread, k, ptask, pass ) );
2895 
2896  __kmp_second_top_half_finish_proxy(taskdata);
2897 
2898  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2899 }
2900 
2901 //---------------------------------------------------------------------------------
2902 // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2903 //
2904 // thread: allocating thread
2905 // task_src: pointer to source task to be duplicated
2906 // returns: a pointer to the allocated kmp_task_t structure (task).
2907 kmp_task_t *
2908 __kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2909 {
2910  kmp_task_t *task;
2911  kmp_taskdata_t *taskdata;
2912  kmp_taskdata_t *taskdata_src;
2913  kmp_taskdata_t *parent_task = thread->th.th_current_task;
2914  size_t shareds_offset;
2915  size_t task_size;
2916 
2917  KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2918  taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2919  KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2920  KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2921  task_size = taskdata_src->td_size_alloc;
2922 
2923  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2924  KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2925  #if USE_FAST_MEMORY
2926  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2927  #else
2928  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2929  #endif /* USE_FAST_MEMORY */
2930  KMP_MEMCPY(taskdata, taskdata_src, task_size);
2931 
2932  task = KMP_TASKDATA_TO_TASK(taskdata);
2933 
2934  // Initialize new task (only specific fields not affected by memcpy)
2935  taskdata->td_task_id = KMP_GEN_TASK_ID();
2936  if( task->shareds != NULL ) { // need setup shareds pointer
2937  shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2938  task->shareds = &((char*)taskdata)[shareds_offset];
2939  KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2940  }
2941  taskdata->td_alloc_thread = thread;
2942  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2943 
2944  // Only need to keep track of child task counts if team parallel and tasking not serialized
2945  if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2946  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2947  if ( parent_task->td_taskgroup )
2948  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2949  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2950  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2951  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2952  }
2953 
2954  KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2955  thread, taskdata, taskdata->td_parent) );
2956 #if OMPT_SUPPORT
2957  __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2958 #endif
2959  return task;
2960 }
2961 
2962 // Routine optionally generated by th ecompiler for setting the lastprivate flag
2963 // and calling needed constructors for private/firstprivate objects
2964 // (used to form taskloop tasks from pattern task)
2965 typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2966 
2967 //---------------------------------------------------------------------------------
2968 // __kmp_taskloop_linear: Start tasks of the taskloop linearly
2969 //
2970 // loc Source location information
2971 // gtid Global thread ID
2972 // task Task with whole loop iteration range
2973 // lb Pointer to loop lower bound
2974 // ub Pointer to loop upper bound
2975 // st Loop stride
2976 // sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2977 // grainsize Schedule value if specified
2978 // task_dup Tasks duplication routine
2979 void
2980 __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2981  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2982  int sched, kmp_uint64 grainsize, void *task_dup )
2983 {
2984  KMP_COUNT_BLOCK(OMP_TASKLOOP);
2985  KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
2986  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2987  kmp_uint64 tc;
2988  kmp_uint64 lower = *lb; // compiler provides global bounds here
2989  kmp_uint64 upper = *ub;
2990  kmp_uint64 i, num_tasks = 0, extras = 0;
2991  kmp_info_t *thread = __kmp_threads[gtid];
2992  kmp_taskdata_t *current_task = thread->th.th_current_task;
2993  kmp_task_t *next_task;
2994  kmp_int32 lastpriv = 0;
2995  size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2996  size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2997 
2998  // compute trip count
2999  if ( st == 1 ) { // most common case
3000  tc = upper - lower + 1;
3001  } else if ( st < 0 ) {
3002  tc = (lower - upper) / (-st) + 1;
3003  } else { // st > 0
3004  tc = (upper - lower) / st + 1;
3005  }
3006  if(tc == 0) {
3007  KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
3008  // free the pattern task and exit
3009  __kmp_task_start( gtid, task, current_task );
3010  // do not execute anything for zero-trip loop
3011  __kmp_task_finish( gtid, task, current_task );
3012  return;
3013  }
3014 
3015  // compute num_tasks/grainsize based on the input provided
3016  switch( sched ) {
3017  case 0: // no schedule clause specified, we can choose the default
3018  // let's try to schedule (team_size*10) tasks
3019  grainsize = thread->th.th_team_nproc * 10;
3020  case 2: // num_tasks provided
3021  if( grainsize > tc ) {
3022  num_tasks = tc; // too big num_tasks requested, adjust values
3023  grainsize = 1;
3024  extras = 0;
3025  } else {
3026  num_tasks = grainsize;
3027  grainsize = tc / num_tasks;
3028  extras = tc % num_tasks;
3029  }
3030  break;
3031  case 1: // grainsize provided
3032  if( grainsize > tc ) {
3033  num_tasks = 1; // too big grainsize requested, adjust values
3034  grainsize = tc;
3035  extras = 0;
3036  } else {
3037  num_tasks = tc / grainsize;
3038  grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3039  extras = tc % num_tasks;
3040  }
3041  break;
3042  default:
3043  KMP_ASSERT2(0, "unknown scheduling of taskloop");
3044  }
3045  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3046  KMP_DEBUG_ASSERT(num_tasks > extras);
3047  KMP_DEBUG_ASSERT(num_tasks > 0);
3048  KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
3049  gtid, num_tasks, grainsize, extras));
3050 
3051  // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3052  for( i = 0; i < num_tasks; ++i ) {
3053  kmp_uint64 chunk_minus_1;
3054  if( extras == 0 ) {
3055  chunk_minus_1 = grainsize - 1;
3056  } else {
3057  chunk_minus_1 = grainsize;
3058  --extras; // first extras iterations get bigger chunk (grainsize+1)
3059  }
3060  upper = lower + st * chunk_minus_1;
3061  if( i == num_tasks - 1 ) {
3062  // schedule the last task, set lastprivate flag
3063  lastpriv = 1;
3064 #if KMP_DEBUG
3065  if( st == 1 )
3066  KMP_DEBUG_ASSERT(upper == *ub);
3067  else if( st > 0 )
3068  KMP_DEBUG_ASSERT(upper+st > *ub);
3069  else
3070  KMP_DEBUG_ASSERT(upper+st < *ub);
3071 #endif
3072  }
3073  next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3074  *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3075  *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3076  if( ptask_dup != NULL )
3077  ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3078  KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
3079  gtid, next_task, lower, upper, lower_offset, upper_offset));
3080  __kmp_omp_task(gtid, next_task, true); // schedule new task
3081  lower = upper + st; // adjust lower bound for the next iteration
3082  }
3083  // free the pattern task and exit
3084  __kmp_task_start( gtid, task, current_task );
3085  // do not execute the pattern task, just do bookkeeping
3086  __kmp_task_finish( gtid, task, current_task );
3087 }
3088 
3105 void
3106 __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3107  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3108  int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3109 {
3110  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3111  KMP_DEBUG_ASSERT( task != NULL );
3112 
3113  KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3114  gtid, taskdata, *lb, *ub, st, grainsize, sched));
3115 
3116  // check if clause value first
3117  if( if_val == 0 ) { // if(0) specified, mark task as serial
3118  taskdata->td_flags.task_serial = 1;
3119  taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3120  }
3121  if( nogroup == 0 ) {
3122  __kmpc_taskgroup( loc, gtid );
3123  }
3124 
3125  if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3126  __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3127  }
3128 
3129  if( nogroup == 0 ) {
3130  __kmpc_end_taskgroup( loc, gtid );
3131  }
3132  KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3133 }
3134 
3135 #endif
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:777
Definition: kmp.h:194