LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 #include <atomic>
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 #include "kmp_i18n.h"
22 #include "kmp_lock.h"
23 #include "kmp_io.h"
24 
25 #if KMP_USE_FUTEX
26 # include <unistd.h>
27 # include <sys/syscall.h>
28 // We should really include <futex.h>, but that causes compatibility problems on different
29 // Linux* OS distributions that either require that you include (or break when you try to include)
30 // <pci/types.h>.
31 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
32 // we just define the constants here and don't include <futex.h>
33 # ifndef FUTEX_WAIT
34 # define FUTEX_WAIT 0
35 # endif
36 # ifndef FUTEX_WAKE
37 # define FUTEX_WAKE 1
38 # endif
39 #endif
40 
41 /* Implement spin locks for internal library use. */
42 /* The algorithm implemented is Lamport's bakery lock [1974]. */
43 
44 void
45 __kmp_validate_locks( void )
46 {
47  int i;
48  kmp_uint32 x, y;
49 
50  /* Check to make sure unsigned arithmetic does wraps properly */
51  x = ~((kmp_uint32) 0) - 2;
52  y = x - 2;
53 
54  for (i = 0; i < 8; ++i, ++x, ++y) {
55  kmp_uint32 z = (x - y);
56  KMP_ASSERT( z == 2 );
57  }
58 
59  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
60 }
61 
62 
63 /* ------------------------------------------------------------------------ */
64 /* test and set locks */
65 
66 //
67 // For the non-nested locks, we can only assume that the first 4 bytes were
68 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
69 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
70 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
71 //
72 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
73 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
74 //
75 
76 static kmp_int32
77 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
78 {
79  return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
80 }
81 
82 static inline bool
83 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
84 {
85  return lck->lk.depth_locked != -1;
86 }
87 
88 __forceinline static int
89 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
90 {
91  KMP_MB();
92 
93 #ifdef USE_LOCK_PROFILE
94  kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
95  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
96  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
97  /* else __kmp_printf( "." );*/
98 #endif /* USE_LOCK_PROFILE */
99 
100  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
101  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
102  KMP_FSYNC_ACQUIRED(lck);
103  return KMP_LOCK_ACQUIRED_FIRST;
104  }
105 
106  kmp_uint32 spins;
107  KMP_FSYNC_PREPARE( lck );
108  KMP_INIT_YIELD( spins );
109  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
110  __kmp_xproc ) ) {
111  KMP_YIELD( TRUE );
112  }
113  else {
114  KMP_YIELD_SPIN( spins );
115  }
116 
117  kmp_backoff_t backoff = __kmp_spin_backoff_params;
118  while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
119  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
120 
121  __kmp_spin_backoff(&backoff);
122  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
123  __kmp_xproc ) ) {
124  KMP_YIELD( TRUE );
125  }
126  else {
127  KMP_YIELD_SPIN( spins );
128  }
129  }
130  KMP_FSYNC_ACQUIRED( lck );
131  return KMP_LOCK_ACQUIRED_FIRST;
132 }
133 
134 int
135 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
136 {
137  return __kmp_acquire_tas_lock_timed_template( lck, gtid );
138 }
139 
140 static int
141 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
142 {
143  char const * const func = "omp_set_lock";
144  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
145  && __kmp_is_tas_lock_nestable( lck ) ) {
146  KMP_FATAL( LockNestableUsedAsSimple, func );
147  }
148  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
149  KMP_FATAL( LockIsAlreadyOwned, func );
150  }
151  return __kmp_acquire_tas_lock( lck, gtid );
152 }
153 
154 int
155 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
156 {
157  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
158  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
159  KMP_FSYNC_ACQUIRED( lck );
160  return TRUE;
161  }
162  return FALSE;
163 }
164 
165 static int
166 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
167 {
168  char const * const func = "omp_test_lock";
169  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
170  && __kmp_is_tas_lock_nestable( lck ) ) {
171  KMP_FATAL( LockNestableUsedAsSimple, func );
172  }
173  return __kmp_test_tas_lock( lck, gtid );
174 }
175 
176 int
177 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
178 {
179  KMP_MB(); /* Flush all pending memory write invalidates. */
180 
181  KMP_FSYNC_RELEASING(lck);
182  KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
183  KMP_MB(); /* Flush all pending memory write invalidates. */
184 
185  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
186  __kmp_xproc ) );
187  return KMP_LOCK_RELEASED;
188 }
189 
190 static int
191 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192 {
193  char const * const func = "omp_unset_lock";
194  KMP_MB(); /* in case another processor initialized lock */
195  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196  && __kmp_is_tas_lock_nestable( lck ) ) {
197  KMP_FATAL( LockNestableUsedAsSimple, func );
198  }
199  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
200  KMP_FATAL( LockUnsettingFree, func );
201  }
202  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
203  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
204  KMP_FATAL( LockUnsettingSetByAnother, func );
205  }
206  return __kmp_release_tas_lock( lck, gtid );
207 }
208 
209 void
210 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
211 {
212  TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
213 }
214 
215 static void
216 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
217 {
218  __kmp_init_tas_lock( lck );
219 }
220 
221 void
222 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
223 {
224  lck->lk.poll = 0;
225 }
226 
227 static void
228 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
229 {
230  char const * const func = "omp_destroy_lock";
231  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
232  && __kmp_is_tas_lock_nestable( lck ) ) {
233  KMP_FATAL( LockNestableUsedAsSimple, func );
234  }
235  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
236  KMP_FATAL( LockStillOwned, func );
237  }
238  __kmp_destroy_tas_lock( lck );
239 }
240 
241 
242 //
243 // nested test and set locks
244 //
245 
246 int
247 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
248 {
249  KMP_DEBUG_ASSERT( gtid >= 0 );
250 
251  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
252  lck->lk.depth_locked += 1;
253  return KMP_LOCK_ACQUIRED_NEXT;
254  }
255  else {
256  __kmp_acquire_tas_lock_timed_template( lck, gtid );
257  lck->lk.depth_locked = 1;
258  return KMP_LOCK_ACQUIRED_FIRST;
259  }
260 }
261 
262 static int
263 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
264 {
265  char const * const func = "omp_set_nest_lock";
266  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
267  KMP_FATAL( LockSimpleUsedAsNestable, func );
268  }
269  return __kmp_acquire_nested_tas_lock( lck, gtid );
270 }
271 
272 int
273 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
274 {
275  int retval;
276 
277  KMP_DEBUG_ASSERT( gtid >= 0 );
278 
279  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
280  retval = ++lck->lk.depth_locked;
281  }
282  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
283  retval = 0;
284  }
285  else {
286  KMP_MB();
287  retval = lck->lk.depth_locked = 1;
288  }
289  return retval;
290 }
291 
292 static int
293 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
294 {
295  char const * const func = "omp_test_nest_lock";
296  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
297  KMP_FATAL( LockSimpleUsedAsNestable, func );
298  }
299  return __kmp_test_nested_tas_lock( lck, gtid );
300 }
301 
302 int
303 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
304 {
305  KMP_DEBUG_ASSERT( gtid >= 0 );
306 
307  KMP_MB();
308  if ( --(lck->lk.depth_locked) == 0 ) {
309  __kmp_release_tas_lock( lck, gtid );
310  return KMP_LOCK_RELEASED;
311  }
312  return KMP_LOCK_STILL_HELD;
313 }
314 
315 static int
316 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
317 {
318  char const * const func = "omp_unset_nest_lock";
319  KMP_MB(); /* in case another processor initialized lock */
320  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
321  KMP_FATAL( LockSimpleUsedAsNestable, func );
322  }
323  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
324  KMP_FATAL( LockUnsettingFree, func );
325  }
326  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
327  KMP_FATAL( LockUnsettingSetByAnother, func );
328  }
329  return __kmp_release_nested_tas_lock( lck, gtid );
330 }
331 
332 void
333 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
334 {
335  __kmp_init_tas_lock( lck );
336  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
337 }
338 
339 static void
340 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
341 {
342  __kmp_init_nested_tas_lock( lck );
343 }
344 
345 void
346 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
347 {
348  __kmp_destroy_tas_lock( lck );
349  lck->lk.depth_locked = 0;
350 }
351 
352 static void
353 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
354 {
355  char const * const func = "omp_destroy_nest_lock";
356  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
357  KMP_FATAL( LockSimpleUsedAsNestable, func );
358  }
359  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
360  KMP_FATAL( LockStillOwned, func );
361  }
362  __kmp_destroy_nested_tas_lock( lck );
363 }
364 
365 
366 #if KMP_USE_FUTEX
367 
368 /* ------------------------------------------------------------------------ */
369 /* futex locks */
370 
371 // futex locks are really just test and set locks, with a different method
372 // of handling contention. They take the same amount of space as test and
373 // set locks, and are allocated the same way (i.e. use the area allocated by
374 // the compiler for non-nested locks / allocate nested locks on the heap).
375 
376 static kmp_int32
377 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
378 {
379  return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
380 }
381 
382 static inline bool
383 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
384 {
385  return lck->lk.depth_locked != -1;
386 }
387 
388 __forceinline static int
389 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
390 {
391  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
392 
393  KMP_MB();
394 
395 #ifdef USE_LOCK_PROFILE
396  kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
397  if ( ( curr != 0 ) && ( curr != gtid_code ) )
398  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
399  /* else __kmp_printf( "." );*/
400 #endif /* USE_LOCK_PROFILE */
401 
402  KMP_FSYNC_PREPARE( lck );
403  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
404  lck, lck->lk.poll, gtid ) );
405 
406  kmp_int32 poll_val;
407 
408  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
409  KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
410 
411  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
412  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
413  lck, gtid, poll_val, cond ) );
414 
415  //
416  // NOTE: if you try to use the following condition for this branch
417  //
418  // if ( poll_val & 1 == 0 )
419  //
420  // Then the 12.0 compiler has a bug where the following block will
421  // always be skipped, regardless of the value of the LSB of poll_val.
422  //
423  if ( ! cond ) {
424  //
425  // Try to set the lsb in the poll to indicate to the owner
426  // thread that they need to wake this thread up.
427  //
428  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
429  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
430  lck, lck->lk.poll, gtid ) );
431  continue;
432  }
433  poll_val |= KMP_LOCK_BUSY(1, futex);
434 
435  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
436  lck, lck->lk.poll, gtid ) );
437  }
438 
439  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
440  lck, gtid, poll_val ) );
441 
442  kmp_int32 rc;
443  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
444  poll_val, NULL, NULL, 0 ) ) != 0 ) {
445  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
446  lck, gtid, poll_val, rc, errno ) );
447  continue;
448  }
449 
450  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
451  lck, gtid, poll_val ) );
452  //
453  // This thread has now done a successful futex wait call and was
454  // entered on the OS futex queue. We must now perform a futex
455  // wake call when releasing the lock, as we have no idea how many
456  // other threads are in the queue.
457  //
458  gtid_code |= 1;
459  }
460 
461  KMP_FSYNC_ACQUIRED( lck );
462  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
463  lck, lck->lk.poll, gtid ) );
464  return KMP_LOCK_ACQUIRED_FIRST;
465 }
466 
467 int
468 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
469 {
470  return __kmp_acquire_futex_lock_timed_template( lck, gtid );
471 }
472 
473 static int
474 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
475 {
476  char const * const func = "omp_set_lock";
477  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
478  && __kmp_is_futex_lock_nestable( lck ) ) {
479  KMP_FATAL( LockNestableUsedAsSimple, func );
480  }
481  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
482  KMP_FATAL( LockIsAlreadyOwned, func );
483  }
484  return __kmp_acquire_futex_lock( lck, gtid );
485 }
486 
487 int
488 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
489 {
490  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) {
491  KMP_FSYNC_ACQUIRED( lck );
492  return TRUE;
493  }
494  return FALSE;
495 }
496 
497 static int
498 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
499 {
500  char const * const func = "omp_test_lock";
501  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
502  && __kmp_is_futex_lock_nestable( lck ) ) {
503  KMP_FATAL( LockNestableUsedAsSimple, func );
504  }
505  return __kmp_test_futex_lock( lck, gtid );
506 }
507 
508 int
509 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
510 {
511  KMP_MB(); /* Flush all pending memory write invalidates. */
512 
513  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
514  lck, lck->lk.poll, gtid ) );
515 
516  KMP_FSYNC_RELEASING(lck);
517 
518  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
519 
520  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
521  lck, gtid, poll_val ) );
522 
523  if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
524  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
525  lck, gtid ) );
526  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
527  }
528 
529  KMP_MB(); /* Flush all pending memory write invalidates. */
530 
531  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
532  lck, lck->lk.poll, gtid ) );
533 
534  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
535  __kmp_xproc ) );
536  return KMP_LOCK_RELEASED;
537 }
538 
539 static int
540 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
541 {
542  char const * const func = "omp_unset_lock";
543  KMP_MB(); /* in case another processor initialized lock */
544  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
545  && __kmp_is_futex_lock_nestable( lck ) ) {
546  KMP_FATAL( LockNestableUsedAsSimple, func );
547  }
548  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
549  KMP_FATAL( LockUnsettingFree, func );
550  }
551  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
552  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
553  KMP_FATAL( LockUnsettingSetByAnother, func );
554  }
555  return __kmp_release_futex_lock( lck, gtid );
556 }
557 
558 void
559 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
560 {
561  TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
562 }
563 
564 static void
565 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
566 {
567  __kmp_init_futex_lock( lck );
568 }
569 
570 void
571 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
572 {
573  lck->lk.poll = 0;
574 }
575 
576 static void
577 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
578 {
579  char const * const func = "omp_destroy_lock";
580  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
581  && __kmp_is_futex_lock_nestable( lck ) ) {
582  KMP_FATAL( LockNestableUsedAsSimple, func );
583  }
584  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
585  KMP_FATAL( LockStillOwned, func );
586  }
587  __kmp_destroy_futex_lock( lck );
588 }
589 
590 
591 //
592 // nested futex locks
593 //
594 
595 int
596 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
597 {
598  KMP_DEBUG_ASSERT( gtid >= 0 );
599 
600  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
601  lck->lk.depth_locked += 1;
602  return KMP_LOCK_ACQUIRED_NEXT;
603  }
604  else {
605  __kmp_acquire_futex_lock_timed_template( lck, gtid );
606  lck->lk.depth_locked = 1;
607  return KMP_LOCK_ACQUIRED_FIRST;
608  }
609 }
610 
611 static int
612 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
613 {
614  char const * const func = "omp_set_nest_lock";
615  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
616  KMP_FATAL( LockSimpleUsedAsNestable, func );
617  }
618  return __kmp_acquire_nested_futex_lock( lck, gtid );
619 }
620 
621 int
622 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
623 {
624  int retval;
625 
626  KMP_DEBUG_ASSERT( gtid >= 0 );
627 
628  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
629  retval = ++lck->lk.depth_locked;
630  }
631  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
632  retval = 0;
633  }
634  else {
635  KMP_MB();
636  retval = lck->lk.depth_locked = 1;
637  }
638  return retval;
639 }
640 
641 static int
642 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
643 {
644  char const * const func = "omp_test_nest_lock";
645  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
646  KMP_FATAL( LockSimpleUsedAsNestable, func );
647  }
648  return __kmp_test_nested_futex_lock( lck, gtid );
649 }
650 
651 int
652 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654  KMP_DEBUG_ASSERT( gtid >= 0 );
655 
656  KMP_MB();
657  if ( --(lck->lk.depth_locked) == 0 ) {
658  __kmp_release_futex_lock( lck, gtid );
659  return KMP_LOCK_RELEASED;
660  }
661  return KMP_LOCK_STILL_HELD;
662 }
663 
664 static int
665 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
666 {
667  char const * const func = "omp_unset_nest_lock";
668  KMP_MB(); /* in case another processor initialized lock */
669  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
670  KMP_FATAL( LockSimpleUsedAsNestable, func );
671  }
672  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
673  KMP_FATAL( LockUnsettingFree, func );
674  }
675  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
676  KMP_FATAL( LockUnsettingSetByAnother, func );
677  }
678  return __kmp_release_nested_futex_lock( lck, gtid );
679 }
680 
681 void
682 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
683 {
684  __kmp_init_futex_lock( lck );
685  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
686 }
687 
688 static void
689 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
690 {
691  __kmp_init_nested_futex_lock( lck );
692 }
693 
694 void
695 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
696 {
697  __kmp_destroy_futex_lock( lck );
698  lck->lk.depth_locked = 0;
699 }
700 
701 static void
702 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
703 {
704  char const * const func = "omp_destroy_nest_lock";
705  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
706  KMP_FATAL( LockSimpleUsedAsNestable, func );
707  }
708  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
709  KMP_FATAL( LockStillOwned, func );
710  }
711  __kmp_destroy_nested_futex_lock( lck );
712 }
713 
714 #endif // KMP_USE_FUTEX
715 
716 
717 /* ------------------------------------------------------------------------ */
718 /* ticket (bakery) locks */
719 
720 static kmp_int32
721 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
722 {
723  return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
724 }
725 
726 static inline bool
727 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
728 {
729  return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
730 }
731 
732 static kmp_uint32
733 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
734 {
735  return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
736 }
737 
738 __forceinline static int
739 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
740 {
741  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
742 
743 #ifdef USE_LOCK_PROFILE
744  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
745  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
746  /* else __kmp_printf( "." );*/
747 #endif /* USE_LOCK_PROFILE */
748 
749  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
750  return KMP_LOCK_ACQUIRED_FIRST;
751  }
752  KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
753  return KMP_LOCK_ACQUIRED_FIRST;
754 }
755 
756 int
757 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
758 {
759  return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
760 }
761 
762 static int
763 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
764 {
765  char const * const func = "omp_set_lock";
766 
767  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
768  KMP_FATAL( LockIsUninitialized, func );
769  }
770  if ( lck->lk.self != lck ) {
771  KMP_FATAL( LockIsUninitialized, func );
772  }
773  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
774  KMP_FATAL( LockNestableUsedAsSimple, func );
775  }
776  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
777  KMP_FATAL( LockIsAlreadyOwned, func );
778  }
779 
780  __kmp_acquire_ticket_lock( lck, gtid );
781 
782  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
783  return KMP_LOCK_ACQUIRED_FIRST;
784 }
785 
786 int
787 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
788 {
789  kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
790 
791  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
792  kmp_uint32 next_ticket = my_ticket + 1;
793  if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
794  &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
795  return TRUE;
796  }
797  }
798  return FALSE;
799 }
800 
801 static int
802 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
803 {
804  char const * const func = "omp_test_lock";
805 
806  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
807  KMP_FATAL( LockIsUninitialized, func );
808  }
809  if ( lck->lk.self != lck ) {
810  KMP_FATAL( LockIsUninitialized, func );
811  }
812  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
813  KMP_FATAL( LockNestableUsedAsSimple, func );
814  }
815 
816  int retval = __kmp_test_ticket_lock( lck, gtid );
817 
818  if ( retval ) {
819  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
820  }
821  return retval;
822 }
823 
824 int
825 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
826 {
827  kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
828 
829  std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
830 
831  KMP_YIELD( distance
832  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
833  return KMP_LOCK_RELEASED;
834 }
835 
836 static int
837 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838 {
839  char const * const func = "omp_unset_lock";
840 
841  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
842  KMP_FATAL( LockIsUninitialized, func );
843  }
844  if ( lck->lk.self != lck ) {
845  KMP_FATAL( LockIsUninitialized, func );
846  }
847  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
848  KMP_FATAL( LockNestableUsedAsSimple, func );
849  }
850  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
851  KMP_FATAL( LockUnsettingFree, func );
852  }
853  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
854  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
855  KMP_FATAL( LockUnsettingSetByAnother, func );
856  }
857  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
858  return __kmp_release_ticket_lock( lck, gtid );
859 }
860 
861 void
862 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
863 {
864  lck->lk.location = NULL;
865  lck->lk.self = lck;
866  std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
867  std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
868  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
869  std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
870  std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
871 }
872 
873 static void
874 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
875 {
876  __kmp_init_ticket_lock( lck );
877 }
878 
879 void
880 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
881 {
882  std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
883  lck->lk.self = NULL;
884  lck->lk.location = NULL;
885  std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
886  std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
887  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
888  std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
889 }
890 
891 static void
892 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
893 {
894  char const * const func = "omp_destroy_lock";
895 
896  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
897  KMP_FATAL( LockIsUninitialized, func );
898  }
899  if ( lck->lk.self != lck ) {
900  KMP_FATAL( LockIsUninitialized, func );
901  }
902  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903  KMP_FATAL( LockNestableUsedAsSimple, func );
904  }
905  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906  KMP_FATAL( LockStillOwned, func );
907  }
908  __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919  KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922  std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
923  return KMP_LOCK_ACQUIRED_NEXT;
924  }
925  else {
926  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927  std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
928  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
929  return KMP_LOCK_ACQUIRED_FIRST;
930  }
931 }
932 
933 static int
934 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
935 {
936  char const * const func = "omp_set_nest_lock";
937 
938  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
939  KMP_FATAL( LockIsUninitialized, func );
940  }
941  if ( lck->lk.self != lck ) {
942  KMP_FATAL( LockIsUninitialized, func );
943  }
944  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
945  KMP_FATAL( LockSimpleUsedAsNestable, func );
946  }
947  return __kmp_acquire_nested_ticket_lock( lck, gtid );
948 }
949 
950 int
951 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
952 {
953  int retval;
954 
955  KMP_DEBUG_ASSERT( gtid >= 0 );
956 
957  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
958  retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
959  }
960  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
961  retval = 0;
962  }
963  else {
964  std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
965  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
966  retval = 1;
967  }
968  return retval;
969 }
970 
971 static int
972 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
973  kmp_int32 gtid )
974 {
975  char const * const func = "omp_test_nest_lock";
976 
977  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
978  KMP_FATAL( LockIsUninitialized, func );
979  }
980  if ( lck->lk.self != lck ) {
981  KMP_FATAL( LockIsUninitialized, func );
982  }
983  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
984  KMP_FATAL( LockSimpleUsedAsNestable, func );
985  }
986  return __kmp_test_nested_ticket_lock( lck, gtid );
987 }
988 
989 int
990 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991 {
992  KMP_DEBUG_ASSERT( gtid >= 0 );
993 
994  if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
995  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
996  __kmp_release_ticket_lock( lck, gtid );
997  return KMP_LOCK_RELEASED;
998  }
999  return KMP_LOCK_STILL_HELD;
1000 }
1001 
1002 static int
1003 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1004 {
1005  char const * const func = "omp_unset_nest_lock";
1006 
1007  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1008  KMP_FATAL( LockIsUninitialized, func );
1009  }
1010  if ( lck->lk.self != lck ) {
1011  KMP_FATAL( LockIsUninitialized, func );
1012  }
1013  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1014  KMP_FATAL( LockSimpleUsedAsNestable, func );
1015  }
1016  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1017  KMP_FATAL( LockUnsettingFree, func );
1018  }
1019  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1020  KMP_FATAL( LockUnsettingSetByAnother, func );
1021  }
1022  return __kmp_release_nested_ticket_lock( lck, gtid );
1023 }
1024 
1025 void
1026 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1027 {
1028  __kmp_init_ticket_lock( lck );
1029  std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
1030 }
1031 
1032 static void
1033 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1034 {
1035  __kmp_init_nested_ticket_lock( lck );
1036 }
1037 
1038 void
1039 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1040 {
1041  __kmp_destroy_ticket_lock( lck );
1042  std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
1043 }
1044 
1045 static void
1046 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1047 {
1048  char const * const func = "omp_destroy_nest_lock";
1049 
1050  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1051  KMP_FATAL( LockIsUninitialized, func );
1052  }
1053  if ( lck->lk.self != lck ) {
1054  KMP_FATAL( LockIsUninitialized, func );
1055  }
1056  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1057  KMP_FATAL( LockSimpleUsedAsNestable, func );
1058  }
1059  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1060  KMP_FATAL( LockStillOwned, func );
1061  }
1062  __kmp_destroy_nested_ticket_lock( lck );
1063 }
1064 
1065 
1066 //
1067 // access functions to fields which don't exist for all lock kinds.
1068 //
1069 
1070 static int
1071 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1072 {
1073  return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
1074 }
1075 
1076 static const ident_t *
1077 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1078 {
1079  return lck->lk.location;
1080 }
1081 
1082 static void
1083 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1084 {
1085  lck->lk.location = loc;
1086 }
1087 
1088 static kmp_lock_flags_t
1089 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1090 {
1091  return lck->lk.flags;
1092 }
1093 
1094 static void
1095 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1096 {
1097  lck->lk.flags = flags;
1098 }
1099 
1100 /* ------------------------------------------------------------------------ */
1101 /* queuing locks */
1102 
1103 /*
1104  * First the states
1105  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1106  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1107  * h, h means lock is held or about to transition, 1 element on queue
1108  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1109  *
1110  * Now the transitions
1111  * Acquire(0,0) = -1 ,0
1112  * Release(0,0) = Error
1113  * Acquire(-1,0) = h ,h h > 0
1114  * Release(-1,0) = 0 ,0
1115  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1116  * Release(h,h) = -1 ,0 h > 0
1117  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1118  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1119  *
1120  * And pictorially
1121  *
1122  *
1123  * +-----+
1124  * | 0, 0|------- release -------> Error
1125  * +-----+
1126  * | ^
1127  * acquire| |release
1128  * | |
1129  * | |
1130  * v |
1131  * +-----+
1132  * |-1, 0|
1133  * +-----+
1134  * | ^
1135  * acquire| |release
1136  * | |
1137  * | |
1138  * v |
1139  * +-----+
1140  * | h, h|
1141  * +-----+
1142  * | ^
1143  * acquire| |release
1144  * | |
1145  * | |
1146  * v |
1147  * +-----+
1148  * | h, t|----- acquire, release loopback ---+
1149  * +-----+ |
1150  * ^ |
1151  * | |
1152  * +------------------------------------+
1153  *
1154  */
1155 
1156 #ifdef DEBUG_QUEUING_LOCKS
1157 
1158 /* Stuff for circular trace buffer */
1159 #define TRACE_BUF_ELE 1024
1160 static char traces[TRACE_BUF_ELE][128] = { 0 }
1161 static int tc = 0;
1162 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1163 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1164 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1165 
1166 static void
1167 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1168  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1169 {
1170  kmp_int32 t, i;
1171 
1172  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1173 
1174  i = tc % TRACE_BUF_ELE;
1175  __kmp_printf_no_lock( "%s\n", traces[i] );
1176  i = (i+1) % TRACE_BUF_ELE;
1177  while ( i != (tc % TRACE_BUF_ELE) ) {
1178  __kmp_printf_no_lock( "%s", traces[i] );
1179  i = (i+1) % TRACE_BUF_ELE;
1180  }
1181  __kmp_printf_no_lock( "\n" );
1182 
1183  __kmp_printf_no_lock(
1184  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1185  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1186  head_id, tail_id );
1187 
1188  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1189 
1190  if ( lck->lk.head_id >= 1 ) {
1191  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1192  while (t > 0) {
1193  __kmp_printf_no_lock( "-> %d ", t );
1194  t = __kmp_threads[t-1]->th.th_next_waiting;
1195  }
1196  }
1197  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1198  __kmp_printf_no_lock( "\n\n" );
1199 }
1200 
1201 #endif /* DEBUG_QUEUING_LOCKS */
1202 
1203 static kmp_int32
1204 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1205 {
1206  return TCR_4( lck->lk.owner_id ) - 1;
1207 }
1208 
1209 static inline bool
1210 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1211 {
1212  return lck->lk.depth_locked != -1;
1213 }
1214 
1215 /* Acquire a lock using a the queuing lock implementation */
1216 template <bool takeTime>
1217 /* [TLW] The unused template above is left behind because of what BEB believes is a
1218  potential compiler problem with __forceinline. */
1219 __forceinline static int
1220 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1221  kmp_int32 gtid )
1222 {
1223  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1224  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1225  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1226  volatile kmp_uint32 *spin_here_p;
1227  kmp_int32 need_mf = 1;
1228 
1229 #if OMPT_SUPPORT
1230  ompt_state_t prev_state = ompt_state_undefined;
1231 #endif
1232 
1233  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1234 
1235  KMP_FSYNC_PREPARE( lck );
1236  KMP_DEBUG_ASSERT( this_thr != NULL );
1237  spin_here_p = & this_thr->th.th_spin_here;
1238 
1239 #ifdef DEBUG_QUEUING_LOCKS
1240  TRACE_LOCK( gtid+1, "acq ent" );
1241  if ( *spin_here_p )
1242  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1243  if ( this_thr->th.th_next_waiting != 0 )
1244  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1245 #endif
1246  KMP_DEBUG_ASSERT( !*spin_here_p );
1247  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1248 
1249 
1250  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1251  that may follow, not just in execution order, but also in visibility order. This way,
1252  when a releasing thread observes the changes to the queue by this thread, it can
1253  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1254  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1255  to FALSE before this thread sets it to TRUE, this thread will hang.
1256  */
1257  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1258 
1259  while( 1 ) {
1260  kmp_int32 enqueued;
1261  kmp_int32 head;
1262  kmp_int32 tail;
1263 
1264  head = *head_id_p;
1265 
1266  switch ( head ) {
1267 
1268  case -1:
1269  {
1270 #ifdef DEBUG_QUEUING_LOCKS
1271  tail = *tail_id_p;
1272  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1273 #endif
1274  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1275  this assignment prevents us from entering the if ( t > 0 )
1276  condition in the enqueued case below, which is not necessary for
1277  this state transition */
1278 
1279  need_mf = 0;
1280  /* try (-1,0)->(tid,tid) */
1281  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1282  KMP_PACK_64( -1, 0 ),
1283  KMP_PACK_64( gtid+1, gtid+1 ) );
1284 #ifdef DEBUG_QUEUING_LOCKS
1285  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1286 #endif
1287  }
1288  break;
1289 
1290  default:
1291  {
1292  tail = *tail_id_p;
1293  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1294 
1295 #ifdef DEBUG_QUEUING_LOCKS
1296  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1297 #endif
1298 
1299  if ( tail == 0 ) {
1300  enqueued = FALSE;
1301  }
1302  else {
1303  need_mf = 0;
1304  /* try (h,t) or (h,h)->(h,tid) */
1305  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1306 
1307 #ifdef DEBUG_QUEUING_LOCKS
1308  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1309 #endif
1310  }
1311  }
1312  break;
1313 
1314  case 0: /* empty queue */
1315  {
1316  kmp_int32 grabbed_lock;
1317 
1318 #ifdef DEBUG_QUEUING_LOCKS
1319  tail = *tail_id_p;
1320  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1321 #endif
1322  /* try (0,0)->(-1,0) */
1323 
1324  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1325  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1326 
1327  if ( grabbed_lock ) {
1328 
1329  *spin_here_p = FALSE;
1330 
1331  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1332  lck, gtid ));
1333 #ifdef DEBUG_QUEUING_LOCKS
1334  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1335 #endif
1336 
1337 #if OMPT_SUPPORT
1338  if (ompt_enabled && prev_state != ompt_state_undefined) {
1339  /* change the state before clearing wait_id */
1340  this_thr->th.ompt_thread_info.state = prev_state;
1341  this_thr->th.ompt_thread_info.wait_id = 0;
1342  }
1343 #endif
1344 
1345  KMP_FSYNC_ACQUIRED( lck );
1346  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1347  }
1348  enqueued = FALSE;
1349  }
1350  break;
1351  }
1352 
1353 #if OMPT_SUPPORT
1354  if (ompt_enabled && prev_state == ompt_state_undefined) {
1355  /* this thread will spin; set wait_id before entering wait state */
1356  prev_state = this_thr->th.ompt_thread_info.state;
1357  this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1358  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1359  }
1360 #endif
1361 
1362  if ( enqueued ) {
1363  if ( tail > 0 ) {
1364  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1365  KMP_ASSERT( tail_thr != NULL );
1366  tail_thr->th.th_next_waiting = gtid+1;
1367  /* corresponding wait for this write in release code */
1368  }
1369  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1370 
1371 
1372  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1373  * throughput only here.
1374  */
1375  KMP_MB();
1376  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379  TRACE_LOCK( gtid+1, "acq spin" );
1380 
1381  if ( this_thr->th.th_next_waiting != 0 )
1382  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1383 #endif
1384  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1385  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1386  lck, gtid ));
1387 
1388 #ifdef DEBUG_QUEUING_LOCKS
1389  TRACE_LOCK( gtid+1, "acq exit 2" );
1390 #endif
1391 
1392 #if OMPT_SUPPORT
1393  /* change the state before clearing wait_id */
1394  this_thr->th.ompt_thread_info.state = prev_state;
1395  this_thr->th.ompt_thread_info.wait_id = 0;
1396 #endif
1397 
1398  /* got lock, we were dequeued by the thread that released lock */
1399  return KMP_LOCK_ACQUIRED_FIRST;
1400  }
1401 
1402  /* Yield if number of threads > number of logical processors */
1403  /* ToDo: Not sure why this should only be in oversubscription case,
1404  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1405  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1406  __kmp_xproc ) );
1407 #ifdef DEBUG_QUEUING_LOCKS
1408  TRACE_LOCK( gtid+1, "acq retry" );
1409 #endif
1410 
1411  }
1412  KMP_ASSERT2( 0, "should not get here" );
1413  return KMP_LOCK_ACQUIRED_FIRST;
1414 }
1415 
1416 int
1417 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1418 {
1419  KMP_DEBUG_ASSERT( gtid >= 0 );
1420 
1421  return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1422 }
1423 
1424 static int
1425 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1426  kmp_int32 gtid )
1427 {
1428  char const * const func = "omp_set_lock";
1429  if ( lck->lk.initialized != lck ) {
1430  KMP_FATAL( LockIsUninitialized, func );
1431  }
1432  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1433  KMP_FATAL( LockNestableUsedAsSimple, func );
1434  }
1435  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1436  KMP_FATAL( LockIsAlreadyOwned, func );
1437  }
1438 
1439  __kmp_acquire_queuing_lock( lck, gtid );
1440 
1441  lck->lk.owner_id = gtid + 1;
1442  return KMP_LOCK_ACQUIRED_FIRST;
1443 }
1444 
1445 int
1446 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1447 {
1448  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1449  kmp_int32 head;
1450 #ifdef KMP_DEBUG
1451  kmp_info_t *this_thr;
1452 #endif
1453 
1454  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1455  KMP_DEBUG_ASSERT( gtid >= 0 );
1456 #ifdef KMP_DEBUG
1457  this_thr = __kmp_thread_from_gtid( gtid );
1458  KMP_DEBUG_ASSERT( this_thr != NULL );
1459  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1460 #endif
1461 
1462  head = *head_id_p;
1463 
1464  if ( head == 0 ) { /* nobody on queue, nobody holding */
1465 
1466  /* try (0,0)->(-1,0) */
1467 
1468  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1469  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1470  KMP_FSYNC_ACQUIRED(lck);
1471  return TRUE;
1472  }
1473  }
1474 
1475  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1476  return FALSE;
1477 }
1478 
1479 static int
1480 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1481 {
1482  char const * const func = "omp_test_lock";
1483  if ( lck->lk.initialized != lck ) {
1484  KMP_FATAL( LockIsUninitialized, func );
1485  }
1486  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1487  KMP_FATAL( LockNestableUsedAsSimple, func );
1488  }
1489 
1490  int retval = __kmp_test_queuing_lock( lck, gtid );
1491 
1492  if ( retval ) {
1493  lck->lk.owner_id = gtid + 1;
1494  }
1495  return retval;
1496 }
1497 
1498 int
1499 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1500 {
1501  register kmp_info_t *this_thr;
1502  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1503  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1504 
1505  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1506  KMP_DEBUG_ASSERT( gtid >= 0 );
1507  this_thr = __kmp_thread_from_gtid( gtid );
1508  KMP_DEBUG_ASSERT( this_thr != NULL );
1509 #ifdef DEBUG_QUEUING_LOCKS
1510  TRACE_LOCK( gtid+1, "rel ent" );
1511 
1512  if ( this_thr->th.th_spin_here )
1513  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1514  if ( this_thr->th.th_next_waiting != 0 )
1515  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1516 #endif
1517  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1518  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1519 
1520  KMP_FSYNC_RELEASING(lck);
1521 
1522  while( 1 ) {
1523  kmp_int32 dequeued;
1524  kmp_int32 head;
1525  kmp_int32 tail;
1526 
1527  head = *head_id_p;
1528 
1529 #ifdef DEBUG_QUEUING_LOCKS
1530  tail = *tail_id_p;
1531  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1532  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1533 #endif
1534  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1535 
1536  if ( head == -1 ) { /* nobody on queue */
1537 
1538  /* try (-1,0)->(0,0) */
1539  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1540  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1541  lck, gtid ));
1542 #ifdef DEBUG_QUEUING_LOCKS
1543  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1544 #endif
1545 
1546 #if OMPT_SUPPORT
1547  /* nothing to do - no other thread is trying to shift blame */
1548 #endif
1549 
1550  return KMP_LOCK_RELEASED;
1551  }
1552  dequeued = FALSE;
1553 
1554  }
1555  else {
1556 
1557  tail = *tail_id_p;
1558  if ( head == tail ) { /* only one thread on the queue */
1559 
1560 #ifdef DEBUG_QUEUING_LOCKS
1561  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1562 #endif
1563  KMP_DEBUG_ASSERT( head > 0 );
1564 
1565  /* try (h,h)->(-1,0) */
1566  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1567  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1568 #ifdef DEBUG_QUEUING_LOCKS
1569  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1570 #endif
1571 
1572  }
1573  else {
1574  volatile kmp_int32 *waiting_id_p;
1575  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1576  KMP_DEBUG_ASSERT( head_thr != NULL );
1577  waiting_id_p = & head_thr->th.th_next_waiting;
1578 
1579  /* Does this require synchronous reads? */
1580 #ifdef DEBUG_QUEUING_LOCKS
1581  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1582 #endif
1583  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1584 
1585  /* try (h,t)->(h',t) or (t,t) */
1586 
1587  KMP_MB();
1588  /* make sure enqueuing thread has time to update next waiting thread field */
1589  *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1590 #ifdef DEBUG_QUEUING_LOCKS
1591  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1592 #endif
1593  dequeued = TRUE;
1594  }
1595  }
1596 
1597  if ( dequeued ) {
1598  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1599  KMP_DEBUG_ASSERT( head_thr != NULL );
1600 
1601  /* Does this require synchronous reads? */
1602 #ifdef DEBUG_QUEUING_LOCKS
1603  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1604 #endif
1605  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1606 
1607  /* For clean code only.
1608  * Thread not released until next statement prevents race with acquire code.
1609  */
1610  head_thr->th.th_next_waiting = 0;
1611 #ifdef DEBUG_QUEUING_LOCKS
1612  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1613 #endif
1614 
1615  KMP_MB();
1616  /* reset spin value */
1617  head_thr->th.th_spin_here = FALSE;
1618 
1619  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1620  lck, gtid ));
1621 #ifdef DEBUG_QUEUING_LOCKS
1622  TRACE_LOCK( gtid+1, "rel exit 2" );
1623 #endif
1624  return KMP_LOCK_RELEASED;
1625  }
1626  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1627 
1628 #ifdef DEBUG_QUEUING_LOCKS
1629  TRACE_LOCK( gtid+1, "rel retry" );
1630 #endif
1631 
1632  } /* while */
1633  KMP_ASSERT2( 0, "should not get here" );
1634  return KMP_LOCK_RELEASED;
1635 }
1636 
1637 static int
1638 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1639  kmp_int32 gtid )
1640 {
1641  char const * const func = "omp_unset_lock";
1642  KMP_MB(); /* in case another processor initialized lock */
1643  if ( lck->lk.initialized != lck ) {
1644  KMP_FATAL( LockIsUninitialized, func );
1645  }
1646  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1647  KMP_FATAL( LockNestableUsedAsSimple, func );
1648  }
1649  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1650  KMP_FATAL( LockUnsettingFree, func );
1651  }
1652  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1653  KMP_FATAL( LockUnsettingSetByAnother, func );
1654  }
1655  lck->lk.owner_id = 0;
1656  return __kmp_release_queuing_lock( lck, gtid );
1657 }
1658 
1659 void
1660 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1661 {
1662  lck->lk.location = NULL;
1663  lck->lk.head_id = 0;
1664  lck->lk.tail_id = 0;
1665  lck->lk.next_ticket = 0;
1666  lck->lk.now_serving = 0;
1667  lck->lk.owner_id = 0; // no thread owns the lock.
1668  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1669  lck->lk.initialized = lck;
1670 
1671  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1672 }
1673 
1674 static void
1675 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1676 {
1677  __kmp_init_queuing_lock( lck );
1678 }
1679 
1680 void
1681 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1682 {
1683  lck->lk.initialized = NULL;
1684  lck->lk.location = NULL;
1685  lck->lk.head_id = 0;
1686  lck->lk.tail_id = 0;
1687  lck->lk.next_ticket = 0;
1688  lck->lk.now_serving = 0;
1689  lck->lk.owner_id = 0;
1690  lck->lk.depth_locked = -1;
1691 }
1692 
1693 static void
1694 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1695 {
1696  char const * const func = "omp_destroy_lock";
1697  if ( lck->lk.initialized != lck ) {
1698  KMP_FATAL( LockIsUninitialized, func );
1699  }
1700  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1701  KMP_FATAL( LockNestableUsedAsSimple, func );
1702  }
1703  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1704  KMP_FATAL( LockStillOwned, func );
1705  }
1706  __kmp_destroy_queuing_lock( lck );
1707 }
1708 
1709 
1710 //
1711 // nested queuing locks
1712 //
1713 
1714 int
1715 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716 {
1717  KMP_DEBUG_ASSERT( gtid >= 0 );
1718 
1719  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1720  lck->lk.depth_locked += 1;
1721  return KMP_LOCK_ACQUIRED_NEXT;
1722  }
1723  else {
1724  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1725  KMP_MB();
1726  lck->lk.depth_locked = 1;
1727  KMP_MB();
1728  lck->lk.owner_id = gtid + 1;
1729  return KMP_LOCK_ACQUIRED_FIRST;
1730  }
1731 }
1732 
1733 static int
1734 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1735 {
1736  char const * const func = "omp_set_nest_lock";
1737  if ( lck->lk.initialized != lck ) {
1738  KMP_FATAL( LockIsUninitialized, func );
1739  }
1740  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1741  KMP_FATAL( LockSimpleUsedAsNestable, func );
1742  }
1743  return __kmp_acquire_nested_queuing_lock( lck, gtid );
1744 }
1745 
1746 int
1747 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1748 {
1749  int retval;
1750 
1751  KMP_DEBUG_ASSERT( gtid >= 0 );
1752 
1753  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1754  retval = ++lck->lk.depth_locked;
1755  }
1756  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1757  retval = 0;
1758  }
1759  else {
1760  KMP_MB();
1761  retval = lck->lk.depth_locked = 1;
1762  KMP_MB();
1763  lck->lk.owner_id = gtid + 1;
1764  }
1765  return retval;
1766 }
1767 
1768 static int
1769 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1770  kmp_int32 gtid )
1771 {
1772  char const * const func = "omp_test_nest_lock";
1773  if ( lck->lk.initialized != lck ) {
1774  KMP_FATAL( LockIsUninitialized, func );
1775  }
1776  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1777  KMP_FATAL( LockSimpleUsedAsNestable, func );
1778  }
1779  return __kmp_test_nested_queuing_lock( lck, gtid );
1780 }
1781 
1782 int
1783 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1784 {
1785  KMP_DEBUG_ASSERT( gtid >= 0 );
1786 
1787  KMP_MB();
1788  if ( --(lck->lk.depth_locked) == 0 ) {
1789  KMP_MB();
1790  lck->lk.owner_id = 0;
1791  __kmp_release_queuing_lock( lck, gtid );
1792  return KMP_LOCK_RELEASED;
1793  }
1794  return KMP_LOCK_STILL_HELD;
1795 }
1796 
1797 static int
1798 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1799 {
1800  char const * const func = "omp_unset_nest_lock";
1801  KMP_MB(); /* in case another processor initialized lock */
1802  if ( lck->lk.initialized != lck ) {
1803  KMP_FATAL( LockIsUninitialized, func );
1804  }
1805  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1806  KMP_FATAL( LockSimpleUsedAsNestable, func );
1807  }
1808  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1809  KMP_FATAL( LockUnsettingFree, func );
1810  }
1811  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1812  KMP_FATAL( LockUnsettingSetByAnother, func );
1813  }
1814  return __kmp_release_nested_queuing_lock( lck, gtid );
1815 }
1816 
1817 void
1818 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1819 {
1820  __kmp_init_queuing_lock( lck );
1821  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1822 }
1823 
1824 static void
1825 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1826 {
1827  __kmp_init_nested_queuing_lock( lck );
1828 }
1829 
1830 void
1831 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1832 {
1833  __kmp_destroy_queuing_lock( lck );
1834  lck->lk.depth_locked = 0;
1835 }
1836 
1837 static void
1838 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1839 {
1840  char const * const func = "omp_destroy_nest_lock";
1841  if ( lck->lk.initialized != lck ) {
1842  KMP_FATAL( LockIsUninitialized, func );
1843  }
1844  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1845  KMP_FATAL( LockSimpleUsedAsNestable, func );
1846  }
1847  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1848  KMP_FATAL( LockStillOwned, func );
1849  }
1850  __kmp_destroy_nested_queuing_lock( lck );
1851 }
1852 
1853 
1854 //
1855 // access functions to fields which don't exist for all lock kinds.
1856 //
1857 
1858 static int
1859 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1860 {
1861  return lck == lck->lk.initialized;
1862 }
1863 
1864 static const ident_t *
1865 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1866 {
1867  return lck->lk.location;
1868 }
1869 
1870 static void
1871 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1872 {
1873  lck->lk.location = loc;
1874 }
1875 
1876 static kmp_lock_flags_t
1877 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1878 {
1879  return lck->lk.flags;
1880 }
1881 
1882 static void
1883 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1884 {
1885  lck->lk.flags = flags;
1886 }
1887 
1888 #if KMP_USE_ADAPTIVE_LOCKS
1889 
1890 /*
1891  RTM Adaptive locks
1892 */
1893 
1894 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1895 
1896 #include <immintrin.h>
1897 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1898 
1899 #else
1900 
1901 // Values from the status register after failed speculation.
1902 #define _XBEGIN_STARTED (~0u)
1903 #define _XABORT_EXPLICIT (1 << 0)
1904 #define _XABORT_RETRY (1 << 1)
1905 #define _XABORT_CONFLICT (1 << 2)
1906 #define _XABORT_CAPACITY (1 << 3)
1907 #define _XABORT_DEBUG (1 << 4)
1908 #define _XABORT_NESTED (1 << 5)
1909 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1910 
1911 // Aborts for which it's worth trying again immediately
1912 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1913 
1914 #define STRINGIZE_INTERNAL(arg) #arg
1915 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1916 
1917 // Access to RTM instructions
1918 
1919 /*
1920  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1921  This is the same definition as the compiler intrinsic that will be supported at some point.
1922 */
1923 static __inline int _xbegin()
1924 {
1925  int res = -1;
1926 
1927 #if KMP_OS_WINDOWS
1928 #if KMP_ARCH_X86_64
1929  _asm {
1930  _emit 0xC7
1931  _emit 0xF8
1932  _emit 2
1933  _emit 0
1934  _emit 0
1935  _emit 0
1936  jmp L2
1937  mov res, eax
1938  L2:
1939  }
1940 #else /* IA32 */
1941  _asm {
1942  _emit 0xC7
1943  _emit 0xF8
1944  _emit 2
1945  _emit 0
1946  _emit 0
1947  _emit 0
1948  jmp L2
1949  mov res, eax
1950  L2:
1951  }
1952 #endif // KMP_ARCH_X86_64
1953 #else
1954  /* Note that %eax must be noted as killed (clobbered), because
1955  * the XSR is returned in %eax(%rax) on abort. Other register
1956  * values are restored, so don't need to be killed.
1957  *
1958  * We must also mark 'res' as an input and an output, since otherwise
1959  * 'res=-1' may be dropped as being dead, whereas we do need the
1960  * assignment on the successful (i.e., non-abort) path.
1961  */
1962  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1963  " .long 1f-1b-6\n"
1964  " jmp 2f\n"
1965  "1: movl %%eax,%0\n"
1966  "2:"
1967  :"+r"(res)::"memory","%eax");
1968 #endif // KMP_OS_WINDOWS
1969  return res;
1970 }
1971 
1972 /*
1973  Transaction end
1974 */
1975 static __inline void _xend()
1976 {
1977 #if KMP_OS_WINDOWS
1978  __asm {
1979  _emit 0x0f
1980  _emit 0x01
1981  _emit 0xd5
1982  }
1983 #else
1984  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1985 #endif
1986 }
1987 
1988 /*
1989  This is a macro, the argument must be a single byte constant which
1990  can be evaluated by the inline assembler, since it is emitted as a
1991  byte into the assembly code.
1992 */
1993 #if KMP_OS_WINDOWS
1994 #define _xabort(ARG) \
1995  _asm _emit 0xc6 \
1996  _asm _emit 0xf8 \
1997  _asm _emit ARG
1998 #else
1999 #define _xabort(ARG) \
2000  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2001 #endif
2002 
2003 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2004 
2005 //
2006 // Statistics is collected for testing purpose
2007 //
2008 #if KMP_DEBUG_ADAPTIVE_LOCKS
2009 
2010 // We accumulate speculative lock statistics when the lock is destroyed.
2011 // We keep locks that haven't been destroyed in the liveLocks list
2012 // so that we can grab their statistics too.
2013 static kmp_adaptive_lock_statistics_t destroyedStats;
2014 
2015 // To hold the list of live locks.
2016 static kmp_adaptive_lock_info_t liveLocks;
2017 
2018 // A lock so we can safely update the list of locks.
2019 static kmp_bootstrap_lock_t chain_lock;
2020 
2021 // Initialize the list of stats.
2022 void
2023 __kmp_init_speculative_stats()
2024 {
2025  kmp_adaptive_lock_info_t *lck = &liveLocks;
2026 
2027  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2028  lck->stats.next = lck;
2029  lck->stats.prev = lck;
2030 
2031  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2032  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2033 
2034  __kmp_init_bootstrap_lock( &chain_lock );
2035 
2036 }
2037 
2038 // Insert the lock into the circular list
2039 static void
2040 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2041 {
2042  __kmp_acquire_bootstrap_lock( &chain_lock );
2043 
2044  lck->stats.next = liveLocks.stats.next;
2045  lck->stats.prev = &liveLocks;
2046 
2047  liveLocks.stats.next = lck;
2048  lck->stats.next->stats.prev = lck;
2049 
2050  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052 
2053  __kmp_release_bootstrap_lock( &chain_lock );
2054 }
2055 
2056 static void
2057 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2058 {
2059  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2060  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2061 
2062  kmp_adaptive_lock_info_t * n = lck->stats.next;
2063  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2064 
2065  n->stats.prev = p;
2066  p->stats.next = n;
2067 }
2068 
2069 static void
2070 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2071 {
2072  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2073  __kmp_remember_lock( lck );
2074 }
2075 
2076 static void
2077 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2078 {
2079  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2080 
2081  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2082  t->successfulSpeculations += s->successfulSpeculations;
2083  t->hardFailedSpeculations += s->hardFailedSpeculations;
2084  t->softFailedSpeculations += s->softFailedSpeculations;
2085  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2086  t->lemmingYields += s->lemmingYields;
2087 }
2088 
2089 static void
2090 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2091 {
2092  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2093 
2094  __kmp_acquire_bootstrap_lock( &chain_lock );
2095 
2096  __kmp_add_stats( &destroyedStats, lck );
2097  __kmp_forget_lock( lck );
2098 
2099  __kmp_release_bootstrap_lock( &chain_lock );
2100 }
2101 
2102 static float
2103 percent (kmp_uint32 count, kmp_uint32 total)
2104 {
2105  return (total == 0) ? 0.0: (100.0 * count)/total;
2106 }
2107 
2108 static
2109 FILE * __kmp_open_stats_file()
2110 {
2111  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2112  return stdout;
2113 
2114  size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2115  char buffer[buffLen];
2116  KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2117  (kmp_int32)getpid());
2118  FILE * result = fopen(&buffer[0], "w");
2119 
2120  // Maybe we should issue a warning here...
2121  return result ? result : stdout;
2122 }
2123 
2124 void
2125 __kmp_print_speculative_stats()
2126 {
2127  if (__kmp_user_lock_kind != lk_adaptive)
2128  return;
2129 
2130  FILE * statsFile = __kmp_open_stats_file();
2131 
2132  kmp_adaptive_lock_statistics_t total = destroyedStats;
2133  kmp_adaptive_lock_info_t *lck;
2134 
2135  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2136  __kmp_add_stats( &total, lck );
2137  }
2138  kmp_adaptive_lock_statistics_t *t = &total;
2139  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2140  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2141  t->softFailedSpeculations;
2142 
2143  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2144  fprintf ( statsFile, " Lock parameters: \n"
2145  " max_soft_retries : %10d\n"
2146  " max_badness : %10d\n",
2147  __kmp_adaptive_backoff_params.max_soft_retries,
2148  __kmp_adaptive_backoff_params.max_badness);
2149  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2150  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2151  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2152  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2153  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2154  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2155  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2156 
2157  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2158  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2159  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2160  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2161  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2162  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2163  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2164 
2165  if (statsFile != stdout)
2166  fclose( statsFile );
2167 }
2168 
2169 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2170 #else
2171 # define KMP_INC_STAT(lck,stat)
2172 
2173 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2174 
2175 static inline bool
2176 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2177 {
2178  // It is enough to check that the head_id is zero.
2179  // We don't also need to check the tail.
2180  bool res = lck->lk.head_id == 0;
2181 
2182  // We need a fence here, since we must ensure that no memory operations
2183  // from later in this thread float above that read.
2184 #if KMP_COMPILER_ICC
2185  _mm_mfence();
2186 #else
2187  __sync_synchronize();
2188 #endif
2189 
2190  return res;
2191 }
2192 
2193 // Functions for manipulating the badness
2194 static __inline void
2195 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2196 {
2197  // Reset the badness to zero so we eagerly try to speculate again
2198  lck->lk.adaptive.badness = 0;
2199  KMP_INC_STAT(lck,successfulSpeculations);
2200 }
2201 
2202 // Create a bit mask with one more set bit.
2203 static __inline void
2204 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2205 {
2206  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2207  if ( newBadness > lck->lk.adaptive.max_badness) {
2208  return;
2209  } else {
2210  lck->lk.adaptive.badness = newBadness;
2211  }
2212 }
2213 
2214 // Check whether speculation should be attempted.
2215 static __inline int
2216 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2217 {
2218  kmp_uint32 badness = lck->lk.adaptive.badness;
2219  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2220  int res = (attempts & badness) == 0;
2221  return res;
2222 }
2223 
2224 // Attempt to acquire only the speculative lock.
2225 // Does not back off to the non-speculative lock.
2226 //
2227 static int
2228 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2229 {
2230  int retries = lck->lk.adaptive.max_soft_retries;
2231 
2232  // We don't explicitly count the start of speculation, rather we record
2233  // the results (success, hard fail, soft fail). The sum of all of those
2234  // is the total number of times we started speculation since all
2235  // speculations must end one of those ways.
2236  do
2237  {
2238  kmp_uint32 status = _xbegin();
2239  // Switch this in to disable actual speculation but exercise
2240  // at least some of the rest of the code. Useful for debugging...
2241  // kmp_uint32 status = _XABORT_NESTED;
2242 
2243  if (status == _XBEGIN_STARTED )
2244  { /* We have successfully started speculation
2245  * Check that no-one acquired the lock for real between when we last looked
2246  * and now. This also gets the lock cache line into our read-set,
2247  * which we need so that we'll abort if anyone later claims it for real.
2248  */
2249  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2250  {
2251  // Lock is now visibly acquired, so someone beat us to it.
2252  // Abort the transaction so we'll restart from _xbegin with the
2253  // failure status.
2254  _xabort(0x01);
2255  KMP_ASSERT2( 0, "should not get here" );
2256  }
2257  return 1; // Lock has been acquired (speculatively)
2258  } else {
2259  // We have aborted, update the statistics
2260  if ( status & SOFT_ABORT_MASK)
2261  {
2262  KMP_INC_STAT(lck,softFailedSpeculations);
2263  // and loop round to retry.
2264  }
2265  else
2266  {
2267  KMP_INC_STAT(lck,hardFailedSpeculations);
2268  // Give up if we had a hard failure.
2269  break;
2270  }
2271  }
2272  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2273 
2274  // Either we had a hard failure or we didn't succeed softly after
2275  // the full set of attempts, so back off the badness.
2276  __kmp_step_badness( lck );
2277  return 0;
2278 }
2279 
2280 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2281 // if the speculative lock cannot be acquired.
2282 // We can succeed speculatively, non-speculatively, or fail.
2283 static int
2284 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2285 {
2286  // First try to acquire the lock speculatively
2287  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2288  return 1;
2289 
2290  // Speculative acquisition failed, so try to acquire it non-speculatively.
2291  // Count the non-speculative acquire attempt
2292  lck->lk.adaptive.acquire_attempts++;
2293 
2294  // Use base, non-speculative lock.
2295  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2296  {
2297  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2298  return 1; // Lock is acquired (non-speculatively)
2299  }
2300  else
2301  {
2302  return 0; // Failed to acquire the lock, it's already visibly locked.
2303  }
2304 }
2305 
2306 static int
2307 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2308 {
2309  char const * const func = "omp_test_lock";
2310  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2311  KMP_FATAL( LockIsUninitialized, func );
2312  }
2313 
2314  int retval = __kmp_test_adaptive_lock( lck, gtid );
2315 
2316  if ( retval ) {
2317  lck->lk.qlk.owner_id = gtid + 1;
2318  }
2319  return retval;
2320 }
2321 
2322 // Block until we can acquire a speculative, adaptive lock.
2323 // We check whether we should be trying to speculate.
2324 // If we should be, we check the real lock to see if it is free,
2325 // and, if not, pause without attempting to acquire it until it is.
2326 // Then we try the speculative acquire.
2327 // This means that although we suffer from lemmings a little (
2328 // because all we can't acquire the lock speculatively until
2329 // the queue of threads waiting has cleared), we don't get into a
2330 // state where we can never acquire the lock speculatively (because we
2331 // force the queue to clear by preventing new arrivals from entering the
2332 // queue).
2333 // This does mean that when we're trying to break lemmings, the lock
2334 // is no longer fair. However OpenMP makes no guarantee that its
2335 // locks are fair, so this isn't a real problem.
2336 static void
2337 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2338 {
2339  if ( __kmp_should_speculate( lck, gtid ) )
2340  {
2341  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2342  {
2343  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2344  return;
2345  // We tried speculation and failed, so give up.
2346  }
2347  else
2348  {
2349  // We can't try speculation until the lock is free, so we
2350  // pause here (without suspending on the queueing lock,
2351  // to allow it to drain, then try again.
2352  // All other threads will also see the same result for
2353  // shouldSpeculate, so will be doing the same if they
2354  // try to claim the lock from now on.
2355  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2356  {
2357  KMP_INC_STAT(lck,lemmingYields);
2358  __kmp_yield (TRUE);
2359  }
2360 
2361  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2362  return;
2363  }
2364  }
2365 
2366  // Speculative acquisition failed, so acquire it non-speculatively.
2367  // Count the non-speculative acquire attempt
2368  lck->lk.adaptive.acquire_attempts++;
2369 
2370  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2371  // We have acquired the base lock, so count that.
2372  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2373 }
2374 
2375 static void
2376 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2377 {
2378  char const * const func = "omp_set_lock";
2379  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2380  KMP_FATAL( LockIsUninitialized, func );
2381  }
2382  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2383  KMP_FATAL( LockIsAlreadyOwned, func );
2384  }
2385 
2386  __kmp_acquire_adaptive_lock( lck, gtid );
2387 
2388  lck->lk.qlk.owner_id = gtid + 1;
2389 }
2390 
2391 static int
2392 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2393 {
2394  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2395  { // If the lock doesn't look claimed we must be speculating.
2396  // (Or the user's code is buggy and they're releasing without locking;
2397  // if we had XTEST we'd be able to check that case...)
2398  _xend(); // Exit speculation
2399  __kmp_update_badness_after_success( lck );
2400  }
2401  else
2402  { // Since the lock *is* visibly locked we're not speculating,
2403  // so should use the underlying lock's release scheme.
2404  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2405  }
2406  return KMP_LOCK_RELEASED;
2407 }
2408 
2409 static int
2410 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2411 {
2412  char const * const func = "omp_unset_lock";
2413  KMP_MB(); /* in case another processor initialized lock */
2414  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2415  KMP_FATAL( LockIsUninitialized, func );
2416  }
2417  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2418  KMP_FATAL( LockUnsettingFree, func );
2419  }
2420  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2421  KMP_FATAL( LockUnsettingSetByAnother, func );
2422  }
2423  lck->lk.qlk.owner_id = 0;
2424  __kmp_release_adaptive_lock( lck, gtid );
2425  return KMP_LOCK_RELEASED;
2426 }
2427 
2428 static void
2429 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2430 {
2431  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2432  lck->lk.adaptive.badness = 0;
2433  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2434  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2435  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2436 #if KMP_DEBUG_ADAPTIVE_LOCKS
2437  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2438 #endif
2439  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2440 }
2441 
2442 static void
2443 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2444 {
2445  __kmp_init_adaptive_lock( lck );
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2450 {
2451 #if KMP_DEBUG_ADAPTIVE_LOCKS
2452  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2453 #endif
2454  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2455  // Nothing needed for the speculative part.
2456 }
2457 
2458 static void
2459 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2460 {
2461  char const * const func = "omp_destroy_lock";
2462  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2463  KMP_FATAL( LockIsUninitialized, func );
2464  }
2465  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2466  KMP_FATAL( LockStillOwned, func );
2467  }
2468  __kmp_destroy_adaptive_lock( lck );
2469 }
2470 
2471 
2472 #endif // KMP_USE_ADAPTIVE_LOCKS
2473 
2474 
2475 /* ------------------------------------------------------------------------ */
2476 /* DRDPA ticket locks */
2477 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2478 
2479 static kmp_int32
2480 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2481 {
2482  return TCR_4( lck->lk.owner_id ) - 1;
2483 }
2484 
2485 static inline bool
2486 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2487 {
2488  return lck->lk.depth_locked != -1;
2489 }
2490 
2491 __forceinline static int
2492 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2493 {
2494  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2495  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2496  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2497  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2498  TCR_PTR(lck->lk.polls); // volatile load
2499 
2500 #ifdef USE_LOCK_PROFILE
2501  if (TCR_8(polls[ticket & mask].poll) != ticket)
2502  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2503  /* else __kmp_printf( "." );*/
2504 #endif /* USE_LOCK_PROFILE */
2505 
2506  //
2507  // Now spin-wait, but reload the polls pointer and mask, in case the
2508  // polling area has been reconfigured. Unless it is reconfigured, the
2509  // reloads stay in L1 cache and are cheap.
2510  //
2511  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2512  //
2513  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2514  // and poll to be re-read every spin iteration.
2515  //
2516  kmp_uint32 spins;
2517 
2518  KMP_FSYNC_PREPARE(lck);
2519  KMP_INIT_YIELD(spins);
2520  while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
2521  // If we are oversubscribed,
2522  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2523  // CPU Pause is in the macros for yield.
2524  //
2525  KMP_YIELD(TCR_4(__kmp_nth)
2526  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2527  KMP_YIELD_SPIN(spins);
2528 
2529  // Re-read the mask and the poll pointer from the lock structure.
2530  //
2531  // Make certain that "mask" is read before "polls" !!!
2532  //
2533  // If another thread picks reconfigures the polling area and updates
2534  // their values, and we get the new value of mask and the old polls
2535  // pointer, we could access memory beyond the end of the old polling
2536  // area.
2537  //
2538  mask = TCR_8(lck->lk.mask); // volatile load
2539  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2540  TCR_PTR(lck->lk.polls); // volatile load
2541  }
2542 
2543  //
2544  // Critical section starts here
2545  //
2546  KMP_FSYNC_ACQUIRED(lck);
2547  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2548  ticket, lck));
2549  lck->lk.now_serving = ticket; // non-volatile store
2550 
2551  //
2552  // Deallocate a garbage polling area if we know that we are the last
2553  // thread that could possibly access it.
2554  //
2555  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2556  // ticket.
2557  //
2558  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2559  __kmp_free((void *)lck->lk.old_polls);
2560  lck->lk.old_polls = NULL;
2561  lck->lk.cleanup_ticket = 0;
2562  }
2563 
2564  //
2565  // Check to see if we should reconfigure the polling area.
2566  // If there is still a garbage polling area to be deallocated from a
2567  // previous reconfiguration, let a later thread reconfigure it.
2568  //
2569  if (lck->lk.old_polls == NULL) {
2570  bool reconfigure = false;
2571  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2572  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2573 
2574  if (TCR_4(__kmp_nth)
2575  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2576  //
2577  // We are in oversubscription mode. Contract the polling area
2578  // down to a single location, if that hasn't been done already.
2579  //
2580  if (num_polls > 1) {
2581  reconfigure = true;
2582  num_polls = TCR_4(lck->lk.num_polls);
2583  mask = 0;
2584  num_polls = 1;
2585  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2586  __kmp_allocate(num_polls * sizeof(*polls));
2587  polls[0].poll = ticket;
2588  }
2589  }
2590  else {
2591  //
2592  // We are in under/fully subscribed mode. Check the number of
2593  // threads waiting on the lock. The size of the polling area
2594  // should be at least the number of threads waiting.
2595  //
2596  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2597  if (num_waiting > num_polls) {
2598  kmp_uint32 old_num_polls = num_polls;
2599  reconfigure = true;
2600  do {
2601  mask = (mask << 1) | 1;
2602  num_polls *= 2;
2603  } while (num_polls <= num_waiting);
2604 
2605  //
2606  // Allocate the new polling area, and copy the relevant portion
2607  // of the old polling area to the new area. __kmp_allocate()
2608  // zeroes the memory it allocates, and most of the old area is
2609  // just zero padding, so we only copy the release counters.
2610  //
2611  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2612  __kmp_allocate(num_polls * sizeof(*polls));
2613  kmp_uint32 i;
2614  for (i = 0; i < old_num_polls; i++) {
2615  polls[i].poll = old_polls[i].poll;
2616  }
2617  }
2618  }
2619 
2620  if (reconfigure) {
2621  //
2622  // Now write the updated fields back to the lock structure.
2623  //
2624  // Make certain that "polls" is written before "mask" !!!
2625  //
2626  // If another thread picks up the new value of mask and the old
2627  // polls pointer , it could access memory beyond the end of the
2628  // old polling area.
2629  //
2630  // On x86, we need memory fences.
2631  //
2632  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2633  ticket, lck, num_polls));
2634 
2635  lck->lk.old_polls = old_polls; // non-volatile store
2636  lck->lk.polls = polls; // volatile store
2637 
2638  KMP_MB();
2639 
2640  lck->lk.num_polls = num_polls; // non-volatile store
2641  lck->lk.mask = mask; // volatile store
2642 
2643  KMP_MB();
2644 
2645  //
2646  // Only after the new polling area and mask have been flushed
2647  // to main memory can we update the cleanup ticket field.
2648  //
2649  // volatile load / non-volatile store
2650  //
2651  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2652  }
2653  }
2654  return KMP_LOCK_ACQUIRED_FIRST;
2655 }
2656 
2657 int
2658 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2659 {
2660  return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2661 }
2662 
2663 static int
2664 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2665 {
2666  char const * const func = "omp_set_lock";
2667  if ( lck->lk.initialized != lck ) {
2668  KMP_FATAL( LockIsUninitialized, func );
2669  }
2670  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2671  KMP_FATAL( LockNestableUsedAsSimple, func );
2672  }
2673  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2674  KMP_FATAL( LockIsAlreadyOwned, func );
2675  }
2676 
2677  __kmp_acquire_drdpa_lock( lck, gtid );
2678 
2679  lck->lk.owner_id = gtid + 1;
2680  return KMP_LOCK_ACQUIRED_FIRST;
2681 }
2682 
2683 int
2684 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2685 {
2686  //
2687  // First get a ticket, then read the polls pointer and the mask.
2688  // The polls pointer must be read before the mask!!! (See above)
2689  //
2690  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2691  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2692  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2693  TCR_PTR(lck->lk.polls); // volatile load
2694  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2695  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2696  kmp_uint64 next_ticket = ticket + 1;
2697  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2698  ticket, next_ticket)) {
2699  KMP_FSYNC_ACQUIRED(lck);
2700  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2701  ticket, lck));
2702  lck->lk.now_serving = ticket; // non-volatile store
2703 
2704  //
2705  // Since no threads are waiting, there is no possibility that
2706  // we would want to reconfigure the polling area. We might
2707  // have the cleanup ticket value (which says that it is now
2708  // safe to deallocate old_polls), but we'll let a later thread
2709  // which calls __kmp_acquire_lock do that - this routine
2710  // isn't supposed to block, and we would risk blocks if we
2711  // called __kmp_free() to do the deallocation.
2712  //
2713  return TRUE;
2714  }
2715  }
2716  return FALSE;
2717 }
2718 
2719 static int
2720 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2721 {
2722  char const * const func = "omp_test_lock";
2723  if ( lck->lk.initialized != lck ) {
2724  KMP_FATAL( LockIsUninitialized, func );
2725  }
2726  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2727  KMP_FATAL( LockNestableUsedAsSimple, func );
2728  }
2729 
2730  int retval = __kmp_test_drdpa_lock( lck, gtid );
2731 
2732  if ( retval ) {
2733  lck->lk.owner_id = gtid + 1;
2734  }
2735  return retval;
2736 }
2737 
2738 int
2739 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2740 {
2741  //
2742  // Read the ticket value from the lock data struct, then the polls
2743  // pointer and the mask. The polls pointer must be read before the
2744  // mask!!! (See above)
2745  //
2746  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2747  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2748  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2749  TCR_PTR(lck->lk.polls); // volatile load
2750  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2751  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2752  ticket - 1, lck));
2753  KMP_FSYNC_RELEASING(lck);
2754  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2755  return KMP_LOCK_RELEASED;
2756 }
2757 
2758 static int
2759 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2760 {
2761  char const * const func = "omp_unset_lock";
2762  KMP_MB(); /* in case another processor initialized lock */
2763  if ( lck->lk.initialized != lck ) {
2764  KMP_FATAL( LockIsUninitialized, func );
2765  }
2766  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2767  KMP_FATAL( LockNestableUsedAsSimple, func );
2768  }
2769  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2770  KMP_FATAL( LockUnsettingFree, func );
2771  }
2772  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2773  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2774  KMP_FATAL( LockUnsettingSetByAnother, func );
2775  }
2776  lck->lk.owner_id = 0;
2777  return __kmp_release_drdpa_lock( lck, gtid );
2778 }
2779 
2780 void
2781 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2782 {
2783  lck->lk.location = NULL;
2784  lck->lk.mask = 0;
2785  lck->lk.num_polls = 1;
2786  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2787  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2788  lck->lk.cleanup_ticket = 0;
2789  lck->lk.old_polls = NULL;
2790  lck->lk.next_ticket = 0;
2791  lck->lk.now_serving = 0;
2792  lck->lk.owner_id = 0; // no thread owns the lock.
2793  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2794  lck->lk.initialized = lck;
2795 
2796  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2797 }
2798 
2799 static void
2800 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2801 {
2802  __kmp_init_drdpa_lock( lck );
2803 }
2804 
2805 void
2806 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2807 {
2808  lck->lk.initialized = NULL;
2809  lck->lk.location = NULL;
2810  if (lck->lk.polls != NULL) {
2811  __kmp_free((void *)lck->lk.polls);
2812  lck->lk.polls = NULL;
2813  }
2814  if (lck->lk.old_polls != NULL) {
2815  __kmp_free((void *)lck->lk.old_polls);
2816  lck->lk.old_polls = NULL;
2817  }
2818  lck->lk.mask = 0;
2819  lck->lk.num_polls = 0;
2820  lck->lk.cleanup_ticket = 0;
2821  lck->lk.next_ticket = 0;
2822  lck->lk.now_serving = 0;
2823  lck->lk.owner_id = 0;
2824  lck->lk.depth_locked = -1;
2825 }
2826 
2827 static void
2828 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2829 {
2830  char const * const func = "omp_destroy_lock";
2831  if ( lck->lk.initialized != lck ) {
2832  KMP_FATAL( LockIsUninitialized, func );
2833  }
2834  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2835  KMP_FATAL( LockNestableUsedAsSimple, func );
2836  }
2837  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2838  KMP_FATAL( LockStillOwned, func );
2839  }
2840  __kmp_destroy_drdpa_lock( lck );
2841 }
2842 
2843 
2844 //
2845 // nested drdpa ticket locks
2846 //
2847 
2848 int
2849 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2850 {
2851  KMP_DEBUG_ASSERT( gtid >= 0 );
2852 
2853  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2854  lck->lk.depth_locked += 1;
2855  return KMP_LOCK_ACQUIRED_NEXT;
2856  }
2857  else {
2858  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2859  KMP_MB();
2860  lck->lk.depth_locked = 1;
2861  KMP_MB();
2862  lck->lk.owner_id = gtid + 1;
2863  return KMP_LOCK_ACQUIRED_FIRST;
2864  }
2865 }
2866 
2867 static void
2868 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2869 {
2870  char const * const func = "omp_set_nest_lock";
2871  if ( lck->lk.initialized != lck ) {
2872  KMP_FATAL( LockIsUninitialized, func );
2873  }
2874  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2875  KMP_FATAL( LockSimpleUsedAsNestable, func );
2876  }
2877  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2878 }
2879 
2880 int
2881 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2882 {
2883  int retval;
2884 
2885  KMP_DEBUG_ASSERT( gtid >= 0 );
2886 
2887  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2888  retval = ++lck->lk.depth_locked;
2889  }
2890  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2891  retval = 0;
2892  }
2893  else {
2894  KMP_MB();
2895  retval = lck->lk.depth_locked = 1;
2896  KMP_MB();
2897  lck->lk.owner_id = gtid + 1;
2898  }
2899  return retval;
2900 }
2901 
2902 static int
2903 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2904 {
2905  char const * const func = "omp_test_nest_lock";
2906  if ( lck->lk.initialized != lck ) {
2907  KMP_FATAL( LockIsUninitialized, func );
2908  }
2909  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2910  KMP_FATAL( LockSimpleUsedAsNestable, func );
2911  }
2912  return __kmp_test_nested_drdpa_lock( lck, gtid );
2913 }
2914 
2915 int
2916 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2917 {
2918  KMP_DEBUG_ASSERT( gtid >= 0 );
2919 
2920  KMP_MB();
2921  if ( --(lck->lk.depth_locked) == 0 ) {
2922  KMP_MB();
2923  lck->lk.owner_id = 0;
2924  __kmp_release_drdpa_lock( lck, gtid );
2925  return KMP_LOCK_RELEASED;
2926  }
2927  return KMP_LOCK_STILL_HELD;
2928 }
2929 
2930 static int
2931 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2932 {
2933  char const * const func = "omp_unset_nest_lock";
2934  KMP_MB(); /* in case another processor initialized lock */
2935  if ( lck->lk.initialized != lck ) {
2936  KMP_FATAL( LockIsUninitialized, func );
2937  }
2938  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2939  KMP_FATAL( LockSimpleUsedAsNestable, func );
2940  }
2941  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2942  KMP_FATAL( LockUnsettingFree, func );
2943  }
2944  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2945  KMP_FATAL( LockUnsettingSetByAnother, func );
2946  }
2947  return __kmp_release_nested_drdpa_lock( lck, gtid );
2948 }
2949 
2950 void
2951 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2952 {
2953  __kmp_init_drdpa_lock( lck );
2954  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2955 }
2956 
2957 static void
2958 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2959 {
2960  __kmp_init_nested_drdpa_lock( lck );
2961 }
2962 
2963 void
2964 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2965 {
2966  __kmp_destroy_drdpa_lock( lck );
2967  lck->lk.depth_locked = 0;
2968 }
2969 
2970 static void
2971 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2972 {
2973  char const * const func = "omp_destroy_nest_lock";
2974  if ( lck->lk.initialized != lck ) {
2975  KMP_FATAL( LockIsUninitialized, func );
2976  }
2977  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2978  KMP_FATAL( LockSimpleUsedAsNestable, func );
2979  }
2980  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2981  KMP_FATAL( LockStillOwned, func );
2982  }
2983  __kmp_destroy_nested_drdpa_lock( lck );
2984 }
2985 
2986 
2987 //
2988 // access functions to fields which don't exist for all lock kinds.
2989 //
2990 
2991 static int
2992 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2993 {
2994  return lck == lck->lk.initialized;
2995 }
2996 
2997 static const ident_t *
2998 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2999 {
3000  return lck->lk.location;
3001 }
3002 
3003 static void
3004 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3005 {
3006  lck->lk.location = loc;
3007 }
3008 
3009 static kmp_lock_flags_t
3010 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3011 {
3012  return lck->lk.flags;
3013 }
3014 
3015 static void
3016 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3017 {
3018  lck->lk.flags = flags;
3019 }
3020 
3021 // Time stamp counter
3022 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3023 # define __kmp_tsc() __kmp_hardware_timestamp()
3024 // Runtime's default backoff parameters
3025 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3026 #else
3027 // Use nanoseconds for other platforms
3028 extern kmp_uint64 __kmp_now_nsec();
3029 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3030 # define __kmp_tsc() __kmp_now_nsec()
3031 #endif
3032 
3033 // A useful predicate for dealing with timestamps that may wrap.
3034 // Is a before b?
3035 // Since the timestamps may wrap, this is asking whether it's
3036 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3037 // Times where going clockwise is less distance than going anti-clockwise
3038 // are in the future, others are in the past.
3039 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3040 // whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3041 static inline bool before(kmp_uint64 a, kmp_uint64 b)
3042 {
3043  return ((kmp_int64)b - (kmp_int64)a) > 0;
3044 }
3045 
3046 // Truncated binary exponential backoff function
3047 void
3048 __kmp_spin_backoff(kmp_backoff_t *boff)
3049 {
3050  // We could flatten this loop, but making it a nested loop gives better result.
3051  kmp_uint32 i;
3052  for (i = boff->step; i > 0; i--) {
3053  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3054  do {
3055  KMP_CPU_PAUSE();
3056  } while (before(__kmp_tsc(), goal));
3057  }
3058  boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3059 }
3060 
3061 #if KMP_USE_DYNAMIC_LOCK
3062 
3063 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3064 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3065 {
3066  TCW_4(*lck, KMP_GET_D_TAG(seq));
3067  KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3068 }
3069 
3070 #if KMP_USE_TSX
3071 
3072 // HLE lock functions - imported from the testbed runtime.
3073 #define HLE_ACQUIRE ".byte 0xf2;"
3074 #define HLE_RELEASE ".byte 0xf3;"
3075 
3076 static inline kmp_uint32
3077 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3078 {
3079  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3080  : "+r"(v), "+m"(*p)
3081  :
3082  : "memory");
3083  return v;
3084 }
3085 
3086 static void
3087 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3088 {
3089  TCW_4(*lck, 0);
3090 }
3091 
3092 static void
3093 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3094 {
3095  // Use gtid for KMP_LOCK_BUSY if necessary
3096  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3097  int delay = 1;
3098  do {
3099  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3100  for (int i = delay; i != 0; --i)
3101  KMP_CPU_PAUSE();
3102  delay = ((delay << 1) | 1) & 7;
3103  }
3104  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3105  }
3106 }
3107 
3108 static void
3109 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3110 {
3111  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3112 }
3113 
3114 static int
3115 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3116 {
3117  __asm__ volatile(HLE_RELEASE "movl %1,%0"
3118  : "=m"(*lck)
3119  : "r"(KMP_LOCK_FREE(hle))
3120  : "memory");
3121  return KMP_LOCK_RELEASED;
3122 }
3123 
3124 static int
3125 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3126 {
3127  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3128 }
3129 
3130 static int
3131 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3132 {
3133  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3134 }
3135 
3136 static int
3137 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3138 {
3139  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3140 }
3141 
3142 static void
3143 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3144 {
3145  __kmp_init_queuing_lock(lck);
3146 }
3147 
3148 static void
3149 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3150 {
3151  __kmp_destroy_queuing_lock(lck);
3152 }
3153 
3154 static void
3155 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3156 {
3157  unsigned retries=3, status;
3158  do {
3159  status = _xbegin();
3160  if (status == _XBEGIN_STARTED) {
3161  if (__kmp_is_unlocked_queuing_lock(lck))
3162  return;
3163  _xabort(0xff);
3164  }
3165  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3166  // Wait until lock becomes free
3167  while (! __kmp_is_unlocked_queuing_lock(lck))
3168  __kmp_yield(TRUE);
3169  }
3170  else if (!(status & _XABORT_RETRY))
3171  break;
3172  } while (retries--);
3173 
3174  // Fall-back non-speculative lock (xchg)
3175  __kmp_acquire_queuing_lock(lck, gtid);
3176 }
3177 
3178 static void
3179 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180 {
3181  __kmp_acquire_rtm_lock(lck, gtid);
3182 }
3183 
3184 static int
3185 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3186 {
3187  if (__kmp_is_unlocked_queuing_lock(lck)) {
3188  // Releasing from speculation
3189  _xend();
3190  }
3191  else {
3192  // Releasing from a real lock
3193  __kmp_release_queuing_lock(lck, gtid);
3194  }
3195  return KMP_LOCK_RELEASED;
3196 }
3197 
3198 static int
3199 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3200 {
3201  return __kmp_release_rtm_lock(lck, gtid);
3202 }
3203 
3204 static int
3205 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3206 {
3207  unsigned retries=3, status;
3208  do {
3209  status = _xbegin();
3210  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3211  return 1;
3212  }
3213  if (!(status & _XABORT_RETRY))
3214  break;
3215  } while (retries--);
3216 
3217  return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3218 }
3219 
3220 static int
3221 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3222 {
3223  return __kmp_test_rtm_lock(lck, gtid);
3224 }
3225 
3226 #endif // KMP_USE_TSX
3227 
3228 // Entry functions for indirect locks (first element of direct lock jump tables).
3229 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3230 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3231 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3232 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3233 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3234 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3235 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3236 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3237 
3238 //
3239 // Jump tables for the indirect lock functions.
3240 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3241 //
3242 
3243 // init functions
3244 #define expand(l, op) 0,__kmp_init_direct_lock,
3245 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3246  = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3247 #undef expand
3248 
3249 // destroy functions
3250 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3251 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3252  = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3253 #undef expand
3254 
3255 // set/acquire functions
3256 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3257 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3258  = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3259 #undef expand
3260 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3261 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3262  = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3263 #undef expand
3264 
3265 // unset/release and test functions
3266 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3267 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3268  = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3269 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3270  = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3271 #undef expand
3272 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3273 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3274  = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3275 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3276  = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3277 #undef expand
3278 
3279 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3280 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3281 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3282 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3283 
3284 //
3285 // Jump tables for the indirect lock functions.
3286 //
3287 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3288 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3289 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3290 #undef expand
3291 
3292 // set/acquire functions
3293 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3294 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3295 #undef expand
3296 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3297 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3298 #undef expand
3299 
3300 // unset/release and test functions
3301 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3302 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3303 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3304 #undef expand
3305 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3306 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3307 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3308 #undef expand
3309 
3310 // Exposes only one jump tables (*lock or *lock_with_checks).
3311 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3312 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3313 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3314 
3315 // Lock index table.
3316 kmp_indirect_lock_table_t __kmp_i_lock_table;
3317 
3318 // Size of indirect locks.
3319 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3320 
3321 // Jump tables for lock accessor/modifier.
3322 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3323 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3324 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3325 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3326 
3327 // Use different lock pools for different lock types.
3328 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3329 
3330 // User lock allocator for dynamically dispatched indirect locks.
3331 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3332 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3333 // object is returned to the reusable pool of locks, unique to each lock type.
3334 kmp_indirect_lock_t *
3335 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3336 {
3337  kmp_indirect_lock_t *lck;
3338  kmp_lock_index_t idx;
3339 
3340  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3341 
3342  if (__kmp_indirect_lock_pool[tag] != NULL) {
3343  // Reuse the allocated and destroyed lock object
3344  lck = __kmp_indirect_lock_pool[tag];
3345  if (OMP_LOCK_T_SIZE < sizeof(void *))
3346  idx = lck->lock->pool.index;
3347  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3348  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3349  } else {
3350  idx = __kmp_i_lock_table.next;
3351  // Check capacity and double the size if it is full
3352  if (idx == __kmp_i_lock_table.size) {
3353  // Double up the space for block pointers
3354  int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3355  kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3356  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3357  KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3358  __kmp_free(old_table);
3359  // Allocate new objects in the new blocks
3360  for (int i = row; i < 2*row; ++i)
3361  *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3362  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3363  __kmp_i_lock_table.size = 2*idx;
3364  }
3365  __kmp_i_lock_table.next++;
3366  lck = KMP_GET_I_LOCK(idx);
3367  // Allocate a new base lock object
3368  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3369  KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3370  }
3371 
3372  __kmp_release_lock(&__kmp_global_lock, gtid);
3373 
3374  lck->type = tag;
3375 
3376  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3377  *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3378  } else {
3379  *((kmp_indirect_lock_t **)user_lock) = lck;
3380  }
3381 
3382  return lck;
3383 }
3384 
3385 // User lock lookup for dynamically dispatched locks.
3386 static __forceinline
3387 kmp_indirect_lock_t *
3388 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3389 {
3390  if (__kmp_env_consistency_check) {
3391  kmp_indirect_lock_t *lck = NULL;
3392  if (user_lock == NULL) {
3393  KMP_FATAL(LockIsUninitialized, func);
3394  }
3395  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3396  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3397  if (idx >= __kmp_i_lock_table.size) {
3398  KMP_FATAL(LockIsUninitialized, func);
3399  }
3400  lck = KMP_GET_I_LOCK(idx);
3401  } else {
3402  lck = *((kmp_indirect_lock_t **)user_lock);
3403  }
3404  if (lck == NULL) {
3405  KMP_FATAL(LockIsUninitialized, func);
3406  }
3407  return lck;
3408  } else {
3409  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3410  return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3411  } else {
3412  return *((kmp_indirect_lock_t **)user_lock);
3413  }
3414  }
3415 }
3416 
3417 static void
3418 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3419 {
3420 #if KMP_USE_ADAPTIVE_LOCKS
3421  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3422  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3423  seq = lockseq_queuing;
3424  }
3425 #endif
3426 #if KMP_USE_TSX
3427  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3428  seq = lockseq_queuing;
3429  }
3430 #endif
3431  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3432  kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3433  KMP_I_LOCK_FUNC(l, init)(l->lock);
3434  KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3435 }
3436 
3437 static void
3438 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3439 {
3440  kmp_uint32 gtid = __kmp_entry_gtid();
3441  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3442  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3443  kmp_indirect_locktag_t tag = l->type;
3444 
3445  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3446 
3447  // Use the base lock's space to keep the pool chain.
3448  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3449  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3450  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3451  }
3452  __kmp_indirect_lock_pool[tag] = l;
3453 
3454  __kmp_release_lock(&__kmp_global_lock, gtid);
3455 }
3456 
3457 static void
3458 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3459 {
3460  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3461  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3462 }
3463 
3464 static int
3465 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3466 {
3467  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3468  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3469 }
3470 
3471 static int
3472 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3473 {
3474  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3475  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3476 }
3477 
3478 static void
3479 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3480 {
3481  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3482  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3483 }
3484 
3485 static int
3486 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3487 {
3488  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3489  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3490 }
3491 
3492 static int
3493 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3494 {
3495  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3496  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3497 }
3498 
3499 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3500 
3501 // This is used only in kmp_error.c when consistency checking is on.
3502 kmp_int32
3503 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3504 {
3505  switch (seq) {
3506  case lockseq_tas:
3507  case lockseq_nested_tas:
3508  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3509 #if KMP_USE_FUTEX
3510  case lockseq_futex:
3511  case lockseq_nested_futex:
3512  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3513 #endif
3514  case lockseq_ticket:
3515  case lockseq_nested_ticket:
3516  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3517  case lockseq_queuing:
3518  case lockseq_nested_queuing:
3519 #if KMP_USE_ADAPTIVE_LOCKS
3520  case lockseq_adaptive:
3521 #endif
3522  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3523  case lockseq_drdpa:
3524  case lockseq_nested_drdpa:
3525  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3526  default:
3527  return 0;
3528  }
3529 }
3530 
3531 // Initializes data for dynamic user locks.
3532 void
3533 __kmp_init_dynamic_user_locks()
3534 {
3535  // Initialize jump table for the lock functions
3536  if (__kmp_env_consistency_check) {
3537  __kmp_direct_set = direct_set_check;
3538  __kmp_direct_unset = direct_unset_check;
3539  __kmp_direct_test = direct_test_check;
3540  __kmp_indirect_set = indirect_set_check;
3541  __kmp_indirect_unset = indirect_unset_check;
3542  __kmp_indirect_test = indirect_test_check;
3543  }
3544  else {
3545  __kmp_direct_set = direct_set;
3546  __kmp_direct_unset = direct_unset;
3547  __kmp_direct_test = direct_test;
3548  __kmp_indirect_set = indirect_set;
3549  __kmp_indirect_unset = indirect_unset;
3550  __kmp_indirect_test = indirect_test;
3551  }
3552 
3553  // Initialize lock index table
3554  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3555  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3556  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3557  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3558  __kmp_i_lock_table.next = 0;
3559 
3560  // Indirect lock size
3561  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3562  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3563 #if KMP_USE_ADAPTIVE_LOCKS
3564  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3565 #endif
3566  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3567 #if KMP_USE_TSX
3568  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3569 #endif
3570  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3571 #if KMP_USE_FUTEX
3572  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3573 #endif
3574  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3575  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3576  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3577 
3578  // Initialize lock accessor/modifier
3579 #define fill_jumps(table, expand, sep) { \
3580  table[locktag##sep##ticket] = expand(ticket); \
3581  table[locktag##sep##queuing] = expand(queuing); \
3582  table[locktag##sep##drdpa] = expand(drdpa); \
3583 }
3584 
3585 #if KMP_USE_ADAPTIVE_LOCKS
3586 # define fill_table(table, expand) { \
3587  fill_jumps(table, expand, _); \
3588  table[locktag_adaptive] = expand(queuing); \
3589  fill_jumps(table, expand, _nested_); \
3590 }
3591 #else
3592 # define fill_table(table, expand) { \
3593  fill_jumps(table, expand, _); \
3594  fill_jumps(table, expand, _nested_); \
3595 }
3596 #endif // KMP_USE_ADAPTIVE_LOCKS
3597 
3598 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3599  fill_table(__kmp_indirect_set_location, expand);
3600 #undef expand
3601 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3602  fill_table(__kmp_indirect_set_flags, expand);
3603 #undef expand
3604 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3605  fill_table(__kmp_indirect_get_location, expand);
3606 #undef expand
3607 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3608  fill_table(__kmp_indirect_get_flags, expand);
3609 #undef expand
3610 
3611  __kmp_init_user_locks = TRUE;
3612 }
3613 
3614 // Clean up the lock table.
3615 void
3616 __kmp_cleanup_indirect_user_locks()
3617 {
3618  kmp_lock_index_t i;
3619  int k;
3620 
3621  // Clean up locks in the pools first (they were already destroyed before going into the pools).
3622  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3623  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3624  while (l != NULL) {
3625  kmp_indirect_lock_t *ll = l;
3626  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3627  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3628  __kmp_free(ll->lock);
3629  ll->lock = NULL;
3630  }
3631  __kmp_indirect_lock_pool[k] = NULL;
3632  }
3633  // Clean up the remaining undestroyed locks.
3634  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3635  kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3636  if (l->lock != NULL) {
3637  // Locks not destroyed explicitly need to be destroyed here.
3638  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3639  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3640  __kmp_free(l->lock);
3641  }
3642  }
3643  // Free the table
3644  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3645  __kmp_free(__kmp_i_lock_table.table[i]);
3646  __kmp_free(__kmp_i_lock_table.table);
3647 
3648  __kmp_init_user_locks = FALSE;
3649 }
3650 
3651 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3652 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3653 
3654 #else // KMP_USE_DYNAMIC_LOCK
3655 
3656 /* ------------------------------------------------------------------------ */
3657 /* user locks
3658  *
3659  * They are implemented as a table of function pointers which are set to the
3660  * lock functions of the appropriate kind, once that has been determined.
3661  */
3662 
3663 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3664 
3665 size_t __kmp_base_user_lock_size = 0;
3666 size_t __kmp_user_lock_size = 0;
3667 
3668 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3669 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3670 
3671 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3672 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3673 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3674 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3675 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3676 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3677 
3678 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3679 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3680 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3681 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3682 
3683 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3684 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3685 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3686 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3687 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3688 
3689 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3690 {
3691  switch ( user_lock_kind ) {
3692  case lk_default:
3693  default:
3694  KMP_ASSERT( 0 );
3695 
3696  case lk_tas: {
3697  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3698  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3699 
3700  __kmp_get_user_lock_owner_ =
3701  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3702  ( &__kmp_get_tas_lock_owner );
3703 
3704  if ( __kmp_env_consistency_check ) {
3705  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3706  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3707  }
3708  else {
3709  KMP_BIND_USER_LOCK(tas);
3710  KMP_BIND_NESTED_USER_LOCK(tas);
3711  }
3712 
3713  __kmp_destroy_user_lock_ =
3714  ( void ( * )( kmp_user_lock_p ) )
3715  ( &__kmp_destroy_tas_lock );
3716 
3717  __kmp_is_user_lock_initialized_ =
3718  ( int ( * )( kmp_user_lock_p ) ) NULL;
3719 
3720  __kmp_get_user_lock_location_ =
3721  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3722 
3723  __kmp_set_user_lock_location_ =
3724  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3725 
3726  __kmp_get_user_lock_flags_ =
3727  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3728 
3729  __kmp_set_user_lock_flags_ =
3730  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3731  }
3732  break;
3733 
3734 #if KMP_USE_FUTEX
3735 
3736  case lk_futex: {
3737  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3738  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3739 
3740  __kmp_get_user_lock_owner_ =
3741  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3742  ( &__kmp_get_futex_lock_owner );
3743 
3744  if ( __kmp_env_consistency_check ) {
3745  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3746  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3747  }
3748  else {
3749  KMP_BIND_USER_LOCK(futex);
3750  KMP_BIND_NESTED_USER_LOCK(futex);
3751  }
3752 
3753  __kmp_destroy_user_lock_ =
3754  ( void ( * )( kmp_user_lock_p ) )
3755  ( &__kmp_destroy_futex_lock );
3756 
3757  __kmp_is_user_lock_initialized_ =
3758  ( int ( * )( kmp_user_lock_p ) ) NULL;
3759 
3760  __kmp_get_user_lock_location_ =
3761  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3762 
3763  __kmp_set_user_lock_location_ =
3764  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3765 
3766  __kmp_get_user_lock_flags_ =
3767  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3768 
3769  __kmp_set_user_lock_flags_ =
3770  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3771  }
3772  break;
3773 
3774 #endif // KMP_USE_FUTEX
3775 
3776  case lk_ticket: {
3777  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3778  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3779 
3780  __kmp_get_user_lock_owner_ =
3781  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3782  ( &__kmp_get_ticket_lock_owner );
3783 
3784  if ( __kmp_env_consistency_check ) {
3785  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3786  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3787  }
3788  else {
3789  KMP_BIND_USER_LOCK(ticket);
3790  KMP_BIND_NESTED_USER_LOCK(ticket);
3791  }
3792 
3793  __kmp_destroy_user_lock_ =
3794  ( void ( * )( kmp_user_lock_p ) )
3795  ( &__kmp_destroy_ticket_lock );
3796 
3797  __kmp_is_user_lock_initialized_ =
3798  ( int ( * )( kmp_user_lock_p ) )
3799  ( &__kmp_is_ticket_lock_initialized );
3800 
3801  __kmp_get_user_lock_location_ =
3802  ( const ident_t * ( * )( kmp_user_lock_p ) )
3803  ( &__kmp_get_ticket_lock_location );
3804 
3805  __kmp_set_user_lock_location_ =
3806  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3807  ( &__kmp_set_ticket_lock_location );
3808 
3809  __kmp_get_user_lock_flags_ =
3810  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3811  ( &__kmp_get_ticket_lock_flags );
3812 
3813  __kmp_set_user_lock_flags_ =
3814  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3815  ( &__kmp_set_ticket_lock_flags );
3816  }
3817  break;
3818 
3819  case lk_queuing: {
3820  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3821  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3822 
3823  __kmp_get_user_lock_owner_ =
3824  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3825  ( &__kmp_get_queuing_lock_owner );
3826 
3827  if ( __kmp_env_consistency_check ) {
3828  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3829  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3830  }
3831  else {
3832  KMP_BIND_USER_LOCK(queuing);
3833  KMP_BIND_NESTED_USER_LOCK(queuing);
3834  }
3835 
3836  __kmp_destroy_user_lock_ =
3837  ( void ( * )( kmp_user_lock_p ) )
3838  ( &__kmp_destroy_queuing_lock );
3839 
3840  __kmp_is_user_lock_initialized_ =
3841  ( int ( * )( kmp_user_lock_p ) )
3842  ( &__kmp_is_queuing_lock_initialized );
3843 
3844  __kmp_get_user_lock_location_ =
3845  ( const ident_t * ( * )( kmp_user_lock_p ) )
3846  ( &__kmp_get_queuing_lock_location );
3847 
3848  __kmp_set_user_lock_location_ =
3849  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3850  ( &__kmp_set_queuing_lock_location );
3851 
3852  __kmp_get_user_lock_flags_ =
3853  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3854  ( &__kmp_get_queuing_lock_flags );
3855 
3856  __kmp_set_user_lock_flags_ =
3857  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3858  ( &__kmp_set_queuing_lock_flags );
3859  }
3860  break;
3861 
3862 #if KMP_USE_ADAPTIVE_LOCKS
3863  case lk_adaptive: {
3864  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3865  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3866 
3867  __kmp_get_user_lock_owner_ =
3868  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3869  ( &__kmp_get_queuing_lock_owner );
3870 
3871  if ( __kmp_env_consistency_check ) {
3872  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3873  }
3874  else {
3875  KMP_BIND_USER_LOCK(adaptive);
3876  }
3877 
3878  __kmp_destroy_user_lock_ =
3879  ( void ( * )( kmp_user_lock_p ) )
3880  ( &__kmp_destroy_adaptive_lock );
3881 
3882  __kmp_is_user_lock_initialized_ =
3883  ( int ( * )( kmp_user_lock_p ) )
3884  ( &__kmp_is_queuing_lock_initialized );
3885 
3886  __kmp_get_user_lock_location_ =
3887  ( const ident_t * ( * )( kmp_user_lock_p ) )
3888  ( &__kmp_get_queuing_lock_location );
3889 
3890  __kmp_set_user_lock_location_ =
3891  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3892  ( &__kmp_set_queuing_lock_location );
3893 
3894  __kmp_get_user_lock_flags_ =
3895  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3896  ( &__kmp_get_queuing_lock_flags );
3897 
3898  __kmp_set_user_lock_flags_ =
3899  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3900  ( &__kmp_set_queuing_lock_flags );
3901 
3902  }
3903  break;
3904 #endif // KMP_USE_ADAPTIVE_LOCKS
3905 
3906  case lk_drdpa: {
3907  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3908  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3909 
3910  __kmp_get_user_lock_owner_ =
3911  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3912  ( &__kmp_get_drdpa_lock_owner );
3913 
3914  if ( __kmp_env_consistency_check ) {
3915  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3916  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3917  }
3918  else {
3919  KMP_BIND_USER_LOCK(drdpa);
3920  KMP_BIND_NESTED_USER_LOCK(drdpa);
3921  }
3922 
3923  __kmp_destroy_user_lock_ =
3924  ( void ( * )( kmp_user_lock_p ) )
3925  ( &__kmp_destroy_drdpa_lock );
3926 
3927  __kmp_is_user_lock_initialized_ =
3928  ( int ( * )( kmp_user_lock_p ) )
3929  ( &__kmp_is_drdpa_lock_initialized );
3930 
3931  __kmp_get_user_lock_location_ =
3932  ( const ident_t * ( * )( kmp_user_lock_p ) )
3933  ( &__kmp_get_drdpa_lock_location );
3934 
3935  __kmp_set_user_lock_location_ =
3936  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3937  ( &__kmp_set_drdpa_lock_location );
3938 
3939  __kmp_get_user_lock_flags_ =
3940  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3941  ( &__kmp_get_drdpa_lock_flags );
3942 
3943  __kmp_set_user_lock_flags_ =
3944  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3945  ( &__kmp_set_drdpa_lock_flags );
3946  }
3947  break;
3948  }
3949 }
3950 
3951 
3952 // ----------------------------------------------------------------------------
3953 // User lock table & lock allocation
3954 
3955 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3956 kmp_user_lock_p __kmp_lock_pool = NULL;
3957 
3958 // Lock block-allocation support.
3959 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3960 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3961 
3962 static kmp_lock_index_t
3963 __kmp_lock_table_insert( kmp_user_lock_p lck )
3964 {
3965  // Assume that kmp_global_lock is held upon entry/exit.
3966  kmp_lock_index_t index;
3967  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3968  kmp_lock_index_t size;
3969  kmp_user_lock_p *table;
3970  // Reallocate lock table.
3971  if ( __kmp_user_lock_table.allocated == 0 ) {
3972  size = 1024;
3973  }
3974  else {
3975  size = __kmp_user_lock_table.allocated * 2;
3976  }
3977  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3978  KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3979  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3980  // We cannot free the previous table now, since it may be in use by other
3981  // threads. So save the pointer to the previous table in in the first element of the
3982  // new table. All the tables will be organized into a list, and could be freed when
3983  // library shutting down.
3984  __kmp_user_lock_table.table = table;
3985  __kmp_user_lock_table.allocated = size;
3986  }
3987  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3988  index = __kmp_user_lock_table.used;
3989  __kmp_user_lock_table.table[ index ] = lck;
3990  ++ __kmp_user_lock_table.used;
3991  return index;
3992 }
3993 
3994 static kmp_user_lock_p
3995 __kmp_lock_block_allocate()
3996 {
3997  // Assume that kmp_global_lock is held upon entry/exit.
3998  static int last_index = 0;
3999  if ( ( last_index >= __kmp_num_locks_in_block )
4000  || ( __kmp_lock_blocks == NULL ) ) {
4001  // Restart the index.
4002  last_index = 0;
4003  // Need to allocate a new block.
4004  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4005  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4006  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4007  // Set up the new block.
4008  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4009  new_block->next_block = __kmp_lock_blocks;
4010  new_block->locks = (void *)buffer;
4011  // Publish the new block.
4012  KMP_MB();
4013  __kmp_lock_blocks = new_block;
4014  }
4015  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4016  [ last_index * __kmp_user_lock_size ] ) );
4017  last_index++;
4018  return ret;
4019 }
4020 
4021 //
4022 // Get memory for a lock. It may be freshly allocated memory or reused memory
4023 // from lock pool.
4024 //
4025 kmp_user_lock_p
4026 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4027  kmp_lock_flags_t flags )
4028 {
4029  kmp_user_lock_p lck;
4030  kmp_lock_index_t index;
4031  KMP_DEBUG_ASSERT( user_lock );
4032 
4033  __kmp_acquire_lock( &__kmp_global_lock, gtid );
4034 
4035  if ( __kmp_lock_pool == NULL ) {
4036  // Lock pool is empty. Allocate new memory.
4037  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4038  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4039  }
4040  else {
4041  lck = __kmp_lock_block_allocate();
4042  }
4043 
4044  // Insert lock in the table so that it can be freed in __kmp_cleanup,
4045  // and debugger has info on all allocated locks.
4046  index = __kmp_lock_table_insert( lck );
4047  }
4048  else {
4049  // Pick up lock from pool.
4050  lck = __kmp_lock_pool;
4051  index = __kmp_lock_pool->pool.index;
4052  __kmp_lock_pool = __kmp_lock_pool->pool.next;
4053  }
4054 
4055  //
4056  // We could potentially differentiate between nested and regular locks
4057  // here, and do the lock table lookup for regular locks only.
4058  //
4059  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4060  * ( (kmp_lock_index_t *) user_lock ) = index;
4061  }
4062  else {
4063  * ( (kmp_user_lock_p *) user_lock ) = lck;
4064  }
4065 
4066  // mark the lock if it is critical section lock.
4067  __kmp_set_user_lock_flags( lck, flags );
4068 
4069  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4070 
4071  return lck;
4072 }
4073 
4074 // Put lock's memory to pool for reusing.
4075 void
4076 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4077 {
4078  KMP_DEBUG_ASSERT( user_lock != NULL );
4079  KMP_DEBUG_ASSERT( lck != NULL );
4080 
4081  __kmp_acquire_lock( & __kmp_global_lock, gtid );
4082 
4083  lck->pool.next = __kmp_lock_pool;
4084  __kmp_lock_pool = lck;
4085  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4086  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4087  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4088  lck->pool.index = index;
4089  }
4090 
4091  __kmp_release_lock( & __kmp_global_lock, gtid );
4092 }
4093 
4094 kmp_user_lock_p
4095 __kmp_lookup_user_lock( void **user_lock, char const *func )
4096 {
4097  kmp_user_lock_p lck = NULL;
4098 
4099  if ( __kmp_env_consistency_check ) {
4100  if ( user_lock == NULL ) {
4101  KMP_FATAL( LockIsUninitialized, func );
4102  }
4103  }
4104 
4105  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4106  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4107  if ( __kmp_env_consistency_check ) {
4108  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4109  KMP_FATAL( LockIsUninitialized, func );
4110  }
4111  }
4112  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4113  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4114  lck = __kmp_user_lock_table.table[index];
4115  }
4116  else {
4117  lck = *( (kmp_user_lock_p *)user_lock );
4118  }
4119 
4120  if ( __kmp_env_consistency_check ) {
4121  if ( lck == NULL ) {
4122  KMP_FATAL( LockIsUninitialized, func );
4123  }
4124  }
4125 
4126  return lck;
4127 }
4128 
4129 void
4130 __kmp_cleanup_user_locks( void )
4131 {
4132  //
4133  // Reset lock pool. Do not worry about lock in the pool -- we will free
4134  // them when iterating through lock table (it includes all the locks,
4135  // dead or alive).
4136  //
4137  __kmp_lock_pool = NULL;
4138 
4139 #define IS_CRITICAL(lck) \
4140  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4141  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4142 
4143  //
4144  // Loop through lock table, free all locks.
4145  //
4146  // Do not free item [0], it is reserved for lock tables list.
4147  //
4148  // FIXME - we are iterating through a list of (pointers to) objects of
4149  // type union kmp_user_lock, but we have no way of knowing whether the
4150  // base type is currently "pool" or whatever the global user lock type
4151  // is.
4152  //
4153  // We are relying on the fact that for all of the user lock types
4154  // (except "tas"), the first field in the lock struct is the "initialized"
4155  // field, which is set to the address of the lock object itself when
4156  // the lock is initialized. When the union is of type "pool", the
4157  // first field is a pointer to the next object in the free list, which
4158  // will not be the same address as the object itself.
4159  //
4160  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4161  // will fail for "pool" objects on the free list. This must happen as
4162  // the "location" field of real user locks overlaps the "index" field
4163  // of "pool" objects.
4164  //
4165  // It would be better to run through the free list, and remove all "pool"
4166  // objects from the lock table before executing this loop. However,
4167  // "pool" objects do not always have their index field set (only on
4168  // lin_32e), and I don't want to search the lock table for the address
4169  // of every "pool" object on the free list.
4170  //
4171  while ( __kmp_user_lock_table.used > 1 ) {
4172  const ident *loc;
4173 
4174  //
4175  // reduce __kmp_user_lock_table.used before freeing the lock,
4176  // so that state of locks is consistent
4177  //
4178  kmp_user_lock_p lck = __kmp_user_lock_table.table[
4179  --__kmp_user_lock_table.used ];
4180 
4181  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4182  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4183  //
4184  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4185  // initialized AND it is NOT a critical section (user is not
4186  // responsible for destroying criticals) AND we know source
4187  // location to report.
4188  //
4189  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4190  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4191  ( loc->psource != NULL ) ) {
4192  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4193  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4194  __kmp_str_loc_free( &str_loc);
4195  }
4196 
4197 #ifdef KMP_DEBUG
4198  if ( IS_CRITICAL( lck ) ) {
4199  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4200  }
4201  else {
4202  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4203  }
4204 #endif // KMP_DEBUG
4205 
4206  //
4207  // Cleanup internal lock dynamic resources
4208  // (for drdpa locks particularly).
4209  //
4210  __kmp_destroy_user_lock( lck );
4211  }
4212 
4213  //
4214  // Free the lock if block allocation of locks is not used.
4215  //
4216  if ( __kmp_lock_blocks == NULL ) {
4217  __kmp_free( lck );
4218  }
4219  }
4220 
4221 #undef IS_CRITICAL
4222 
4223  //
4224  // delete lock table(s).
4225  //
4226  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4227  __kmp_user_lock_table.table = NULL;
4228  __kmp_user_lock_table.allocated = 0;
4229 
4230  while ( table_ptr != NULL ) {
4231  //
4232  // In the first element we saved the pointer to the previous
4233  // (smaller) lock table.
4234  //
4235  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4236  __kmp_free( table_ptr );
4237  table_ptr = next;
4238  }
4239 
4240  //
4241  // Free buffers allocated for blocks of locks.
4242  //
4243  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4244  __kmp_lock_blocks = NULL;
4245 
4246  while ( block_ptr != NULL ) {
4247  kmp_block_of_locks_t *next = block_ptr->next_block;
4248  __kmp_free( block_ptr->locks );
4249  //
4250  // *block_ptr itself was allocated at the end of the locks vector.
4251  //
4252  block_ptr = next;
4253  }
4254 
4255  TCW_4(__kmp_init_user_locks, FALSE);
4256 }
4257 
4258 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:194
char const * psource
Definition: kmp.h:203