LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
48 template <typename P> class kmp_flag_native {
49  volatile P *loc;
50  flag_type t;
51 
52 public:
53  typedef P flag_t;
54  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55  volatile P *get() { return loc; }
56  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61 };
62 
66 template <typename P> class kmp_flag {
67  std::atomic<P>
68  *loc;
71 public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
77  std::atomic<P> *get() { return loc; }
81  void *get_void_p() { return RCAST(void *, loc); }
85  void set(std::atomic<P> *new_loc) { loc = new_loc; }
89  flag_type get_type() { return t; }
93  P load() { return loc->load(std::memory_order_acquire); }
97  void store(P val) { loc->store(val, std::memory_order_release); }
98  // Derived classes must provide the following:
99  /*
100  kmp_info_t * get_waiter(kmp_uint32 i);
101  kmp_uint32 get_num_waiters();
102  bool done_check();
103  bool done_check_val(P old_loc);
104  bool notdone_check();
105  P internal_release();
106  void suspend(int th_gtid);
107  void resume(int th_gtid);
108  P set_sleeping();
109  P unset_sleeping();
110  bool is_sleeping();
111  bool is_any_sleeping();
112  bool is_sleeping_val(P old_loc);
113  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114  int *thread_finished
115  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116  is_constrained);
117  */
118 };
119 
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123  ompt_state_t ompt_state,
124  ompt_data_t *tId) {
125  int ds_tid = this_thr->th.th_info.ds.ds_tid;
126  if (ompt_state == ompt_state_wait_barrier_implicit) {
127  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129  void *codeptr = NULL;
130  if (ompt_enabled.ompt_callback_sync_region_wait) {
131  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133  codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138  codeptr);
139  }
140 #endif
141  if (!KMP_MASTER_TID(ds_tid)) {
142  if (ompt_enabled.ompt_callback_implicit_task) {
143  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
144  ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
145  }
146  // return to idle state
147  this_thr->th.ompt_thread_info.state = ompt_state_idle;
148  } else {
149  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
150  }
151  }
152 }
153 #endif
154 
155 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
156  __kmp_wait_* must make certain that another thread calls __kmp_release
157  to wake it back up to prevent deadlocks!
158 
159  NOTE: We may not belong to a team at this point. */
160 template <class C, int final_spin, bool cancellable = false,
161  bool sleepable = true>
162 static inline bool
163 __kmp_wait_template(kmp_info_t *this_thr,
164  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
165 #if USE_ITT_BUILD && USE_ITT_NOTIFY
166  volatile void *spin = flag->get();
167 #endif
168  kmp_uint32 spins;
169  int th_gtid;
170  int tasks_completed = FALSE;
171  int oversubscribed;
172 #if !KMP_USE_MONITOR
173  kmp_uint64 poll_count;
174  kmp_uint64 hibernate_goal;
175 #else
176  kmp_uint32 hibernate;
177 #endif
178 
179  KMP_FSYNC_SPIN_INIT(spin, NULL);
180  if (flag->done_check()) {
181  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
182  return false;
183  }
184  th_gtid = this_thr->th.th_info.ds.ds_gtid;
185  if (cancellable) {
186  kmp_team_t *team = this_thr->th.th_team;
187  if (team && team->t.t_cancel_request == cancel_parallel)
188  return true;
189  }
190 #if KMP_OS_UNIX
191  if (final_spin)
192  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
193 #endif
194  KA_TRACE(20,
195  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
196 #if KMP_STATS_ENABLED
197  stats_state_e thread_state = KMP_GET_THREAD_STATE();
198 #endif
199 
200 /* OMPT Behavior:
201 THIS function is called from
202  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
203  these have join / fork behavior
204 
205  In these cases, we don't change the state or trigger events in THIS
206 function.
207  Events are triggered in the calling code (__kmp_barrier):
208 
209  state := ompt_state_overhead
210  barrier-begin
211  barrier-wait-begin
212  state := ompt_state_wait_barrier
213  call join-barrier-implementation (finally arrive here)
214  {}
215  call fork-barrier-implementation (finally arrive here)
216  {}
217  state := ompt_state_overhead
218  barrier-wait-end
219  barrier-end
220  state := ompt_state_work_parallel
221 
222 
223  __kmp_fork_barrier (after thread creation, before executing implicit task)
224  call fork-barrier-implementation (finally arrive here)
225  {} // worker arrive here with state = ompt_state_idle
226 
227 
228  __kmp_join_barrier (implicit barrier at end of parallel region)
229  state := ompt_state_barrier_implicit
230  barrier-begin
231  barrier-wait-begin
232  call join-barrier-implementation (finally arrive here
233 final_spin=FALSE)
234  {
235  }
236  __kmp_fork_barrier (implicit barrier at end of parallel region)
237  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
238 
239  Worker after task-team is finished:
240  barrier-wait-end
241  barrier-end
242  implicit-task-end
243  idle-begin
244  state := ompt_state_idle
245 
246  Before leaving, if state = ompt_state_idle
247  idle-end
248  state := ompt_state_overhead
249 */
250 #if OMPT_SUPPORT
251  ompt_state_t ompt_entry_state;
252  ompt_data_t *tId;
253  if (ompt_enabled.enabled) {
254  ompt_entry_state = this_thr->th.ompt_thread_info.state;
255  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
256  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
257  ompt_lw_taskteam_t *team =
258  this_thr->th.th_team->t.ompt_serialized_team_info;
259  if (team) {
260  tId = &(team->ompt_task_info.task_data);
261  } else {
262  tId = OMPT_CUR_TASK_DATA(this_thr);
263  }
264  } else {
265  tId = &(this_thr->th.ompt_thread_info.task_data);
266  }
267  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268  this_thr->th.th_task_team == NULL)) {
269  // implicit task is done. Either no taskqueue, or task-team finished
270  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
271  }
272  }
273 #endif
274 
275  KMP_INIT_YIELD(spins); // Setup for waiting
276 
277  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
278  __kmp_pause_status == kmp_soft_paused) {
279 #if KMP_USE_MONITOR
280 // The worker threads cannot rely on the team struct existing at this point.
281 // Use the bt values cached in the thread struct instead.
282 #ifdef KMP_ADJUST_BLOCKTIME
283  if (__kmp_pause_status == kmp_soft_paused ||
284  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
285  // Force immediate suspend if not set by user and more threads than
286  // available procs
287  hibernate = 0;
288  else
289  hibernate = this_thr->th.th_team_bt_intervals;
290 #else
291  hibernate = this_thr->th.th_team_bt_intervals;
292 #endif /* KMP_ADJUST_BLOCKTIME */
293 
294  /* If the blocktime is nonzero, we want to make sure that we spin wait for
295  the entirety of the specified #intervals, plus up to one interval more.
296  This increment make certain that this thread doesn't go to sleep too
297  soon. */
298  if (hibernate != 0)
299  hibernate++;
300 
301  // Add in the current time value.
302  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
303  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
304  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
305  hibernate - __kmp_global.g.g_time.dt.t_value));
306 #else
307  if (__kmp_pause_status == kmp_soft_paused) {
308  // Force immediate suspend
309  hibernate_goal = KMP_NOW();
310  } else
311  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
312  poll_count = 0;
313 #endif // KMP_USE_MONITOR
314  }
315 
316  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
317  KMP_MB();
318 
319  // Main wait spin loop
320  while (flag->notdone_check()) {
321  kmp_task_team_t *task_team = NULL;
322  if (__kmp_tasking_mode != tskm_immediate_exec) {
323  task_team = this_thr->th.th_task_team;
324  /* If the thread's task team pointer is NULL, it means one of 3 things:
325  1) A newly-created thread is first being released by
326  __kmp_fork_barrier(), and its task team has not been set up yet.
327  2) All tasks have been executed to completion.
328  3) Tasking is off for this region. This could be because we are in a
329  serialized region (perhaps the outer one), or else tasking was manually
330  disabled (KMP_TASKING=0). */
331  if (task_team != NULL) {
332  if (TCR_SYNC_4(task_team->tt.tt_active)) {
333  if (KMP_TASKING_ENABLED(task_team))
334  flag->execute_tasks(
335  this_thr, th_gtid, final_spin,
336  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
337  else
338  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
339  } else {
340  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
341 #if OMPT_SUPPORT
342  // task-team is done now, other cases should be catched above
343  if (final_spin && ompt_enabled.enabled)
344  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
345 #endif
346  this_thr->th.th_task_team = NULL;
347  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
348  }
349  } else {
350  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351  } // if
352  } // if
353 
354  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
355  if (TCR_4(__kmp_global.g.g_done)) {
356  if (__kmp_global.g.g_abort)
357  __kmp_abort_thread();
358  break;
359  }
360 
361  // If we are oversubscribed, or have waited a bit (and
362  // KMP_LIBRARY=throughput), then yield
363  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
364 
365 #if KMP_STATS_ENABLED
366  // Check if thread has been signalled to idle state
367  // This indicates that the logical "join-barrier" has finished
368  if (this_thr->th.th_stats->isIdle() &&
369  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
370  KMP_SET_THREAD_STATE(IDLE);
371  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
372  }
373 #endif
374  // Check if the barrier surrounding this wait loop has been cancelled
375  if (cancellable) {
376  kmp_team_t *team = this_thr->th.th_team;
377  if (team && team->t.t_cancel_request == cancel_parallel)
378  break;
379  }
380 
381  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
382  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
383  __kmp_pause_status != kmp_soft_paused)
384  continue;
385 
386  // Don't suspend if there is a likelihood of new tasks being spawned.
387  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
388  continue;
389 
390 #if KMP_USE_MONITOR
391  // If we have waited a bit more, fall asleep
392  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
393  continue;
394 #else
395  if (KMP_BLOCKING(hibernate_goal, poll_count++))
396  continue;
397 #endif
398  // Don't suspend if wait loop designated non-sleepable
399  // in template parameters
400  if (!sleepable)
401  continue;
402 
403  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
404  __kmp_pause_status != kmp_soft_paused)
405  continue;
406 
407  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
408 
409 #if KMP_OS_UNIX
410  if (final_spin)
411  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
412 #endif
413  flag->suspend(th_gtid);
414 #if KMP_OS_UNIX
415  if (final_spin)
416  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
417 #endif
418 
419  if (TCR_4(__kmp_global.g.g_done)) {
420  if (__kmp_global.g.g_abort)
421  __kmp_abort_thread();
422  break;
423  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
424  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
425  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
426  }
427  // TODO: If thread is done with work and times out, disband/free
428  }
429 
430 #if OMPT_SUPPORT
431  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
432  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
433 #if OMPT_OPTIONAL
434  if (final_spin) {
435  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
436  ompt_exit_state = this_thr->th.ompt_thread_info.state;
437  }
438 #endif
439  if (ompt_exit_state == ompt_state_idle) {
440  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
441  }
442  }
443 #endif
444 #if KMP_STATS_ENABLED
445  // If we were put into idle state, pop that off the state stack
446  if (KMP_GET_THREAD_STATE() == IDLE) {
447  KMP_POP_PARTITIONED_TIMER();
448  KMP_SET_THREAD_STATE(thread_state);
449  this_thr->th.th_stats->resetIdleFlag();
450  }
451 #endif
452 
453 #if KMP_OS_UNIX
454  if (final_spin)
455  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
456 #endif
457  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
458  if (cancellable) {
459  kmp_team_t *team = this_thr->th.th_team;
460  if (team && team->t.t_cancel_request == cancel_parallel) {
461  if (tasks_completed) {
462  // undo the previous decrement of unfinished_threads so that the
463  // thread can decrement at the join barrier with no problem
464  kmp_task_team_t *task_team = this_thr->th.th_task_team;
465  std::atomic<kmp_int32> *unfinished_threads =
466  &(task_team->tt.tt_unfinished_threads);
467  KMP_ATOMIC_INC(unfinished_threads);
468  }
469  return true;
470  }
471  }
472  return false;
473 }
474 
475 /* Release any threads specified as waiting on the flag by releasing the flag
476  and resume the waiting thread if indicated by the sleep bit(s). A thread that
477  calls __kmp_wait_template must call this function to wake up the potentially
478  sleeping thread and prevent deadlocks! */
479 template <class C> static inline void __kmp_release_template(C *flag) {
480 #ifdef KMP_DEBUG
481  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
482 #endif
483  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
484  KMP_DEBUG_ASSERT(flag->get());
485  KMP_FSYNC_RELEASING(flag->get_void_p());
486 
487  flag->internal_release();
488 
489  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
490  flag->load()));
491 
492  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
493  // Only need to check sleep stuff if infinite block time not set.
494  // Are *any* threads waiting on flag sleeping?
495  if (flag->is_any_sleeping()) {
496  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
497  // if sleeping waiter exists at i, sets current_waiter to i inside flag
498  kmp_info_t *waiter = flag->get_waiter(i);
499  if (waiter) {
500  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
501  // Wake up thread if needed
502  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
503  "flag(%p) set\n",
504  gtid, wait_gtid, flag->get()));
505  flag->resume(wait_gtid); // unsets flag's current_waiter when done
506  }
507  }
508  }
509  }
510 }
511 
512 template <typename FlagType> struct flag_traits {};
513 
514 template <> struct flag_traits<kmp_uint32> {
515  typedef kmp_uint32 flag_t;
516  static const flag_type t = flag32;
517  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
518  static inline flag_t test_then_add4(volatile flag_t *f) {
519  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
520  }
521  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
522  return KMP_TEST_THEN_OR32(f, v);
523  }
524  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
525  return KMP_TEST_THEN_AND32(f, v);
526  }
527 };
528 
529 template <> struct flag_traits<kmp_uint64> {
530  typedef kmp_uint64 flag_t;
531  static const flag_type t = flag64;
532  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
533  static inline flag_t test_then_add4(volatile flag_t *f) {
534  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
535  }
536  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
537  return KMP_TEST_THEN_OR64(f, v);
538  }
539  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
540  return KMP_TEST_THEN_AND64(f, v);
541  }
542 };
543 
544 // Basic flag that does not use C11 Atomics
545 template <typename FlagType>
546 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
547  typedef flag_traits<FlagType> traits_type;
548  FlagType checker;
550  kmp_info_t
551  *waiting_threads[1];
552  kmp_uint32
553  num_waiting_threads;
554 public:
555  kmp_basic_flag_native(volatile FlagType *p)
556  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
557  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
558  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
559  waiting_threads[0] = thr;
560  }
561  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
562  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
563  num_waiting_threads(0) {}
568  kmp_info_t *get_waiter(kmp_uint32 i) {
569  KMP_DEBUG_ASSERT(i < num_waiting_threads);
570  return waiting_threads[i];
571  }
575  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
581  void set_waiter(kmp_info_t *thr) {
582  waiting_threads[0] = thr;
583  num_waiting_threads = 1;
584  }
588  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
593  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
601  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
606  void internal_release() {
607  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
608  }
614  FlagType set_sleeping() {
615  return traits_type::test_then_or((volatile FlagType *)this->get(),
616  KMP_BARRIER_SLEEP_STATE);
617  }
623  FlagType unset_sleeping() {
624  return traits_type::test_then_and((volatile FlagType *)this->get(),
625  ~KMP_BARRIER_SLEEP_STATE);
626  }
631  bool is_sleeping_val(FlagType old_loc) {
632  return old_loc & KMP_BARRIER_SLEEP_STATE;
633  }
637  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
638  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
639  kmp_uint8 *get_stolen() { return NULL; }
640  enum barrier_type get_bt() { return bs_last_barrier; }
641 };
642 
643 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
644  typedef flag_traits<FlagType> traits_type;
645  FlagType checker;
647  kmp_info_t
648  *waiting_threads[1];
649  kmp_uint32
650  num_waiting_threads;
651 public:
652  kmp_basic_flag(std::atomic<FlagType> *p)
653  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
654  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
655  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
656  waiting_threads[0] = thr;
657  }
658  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
659  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
660  num_waiting_threads(0) {}
665  kmp_info_t *get_waiter(kmp_uint32 i) {
666  KMP_DEBUG_ASSERT(i < num_waiting_threads);
667  return waiting_threads[i];
668  }
672  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
678  void set_waiter(kmp_info_t *thr) {
679  waiting_threads[0] = thr;
680  num_waiting_threads = 1;
681  }
685  bool done_check() { return this->load() == checker; }
690  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
698  bool notdone_check() { return this->load() != checker; }
703  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
709  FlagType set_sleeping() {
710  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
711  }
717  FlagType unset_sleeping() {
718  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
719  }
724  bool is_sleeping_val(FlagType old_loc) {
725  return old_loc & KMP_BARRIER_SLEEP_STATE;
726  }
730  bool is_sleeping() { return is_sleeping_val(this->load()); }
731  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
732  kmp_uint8 *get_stolen() { return NULL; }
733  enum barrier_type get_bt() { return bs_last_barrier; }
734 };
735 
736 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
737 public:
738  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
739  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
740  : kmp_basic_flag<kmp_uint32>(p, thr) {}
741  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
742  : kmp_basic_flag<kmp_uint32>(p, c) {}
743  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
744  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
745  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
746  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
747  kmp_int32 is_constrained) {
748  return __kmp_execute_tasks_32(
749  this_thr, gtid, this, final_spin,
750  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
751  }
752  void wait(kmp_info_t *this_thr,
753  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
754  if (final_spin)
755  __kmp_wait_template<kmp_flag_32, TRUE>(
756  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
757  else
758  __kmp_wait_template<kmp_flag_32, FALSE>(
759  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
760  }
761  void release() { __kmp_release_template(this); }
762  flag_type get_ptr_type() { return flag32; }
763 };
764 
765 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
766 public:
767  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
768  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
769  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
770  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
771  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
772  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
773  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
774  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
775  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
776  kmp_int32 is_constrained) {
777  return __kmp_execute_tasks_64(
778  this_thr, gtid, this, final_spin,
779  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
780  }
781  void wait(kmp_info_t *this_thr,
782  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
783  if (final_spin)
784  __kmp_wait_template<kmp_flag_64, TRUE>(
785  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
786  else
787  __kmp_wait_template<kmp_flag_64, FALSE>(
788  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
789  }
790  bool wait_cancellable_nosleep(kmp_info_t *this_thr,
791  int final_spin
792  USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
793  bool retval = false;
794  if (final_spin)
795  retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
796  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
797  else
798  retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
799  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800  return retval;
801  }
802  void release() { __kmp_release_template(this); }
803  flag_type get_ptr_type() { return flag64; }
804 };
805 
806 // Hierarchical 64-bit on-core barrier instantiation
807 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
808  kmp_uint64 checker;
809  kmp_info_t *waiting_threads[1];
810  kmp_uint32 num_waiting_threads;
811  kmp_uint32
812  offset;
813  bool flag_switch;
814  enum barrier_type bt;
815  kmp_info_t *this_thr;
817 #if USE_ITT_BUILD
818  void *
819  itt_sync_obj;
820 #endif
821  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
822  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
823  }
824 
825 public:
826  kmp_flag_oncore(volatile kmp_uint64 *p)
827  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
828  flag_switch(false) {}
829  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
830  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831  offset(idx), flag_switch(false) {}
832  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
833  enum barrier_type bar_t,
834  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
835  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
836  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
837  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
838  kmp_info_t *get_waiter(kmp_uint32 i) {
839  KMP_DEBUG_ASSERT(i < num_waiting_threads);
840  return waiting_threads[i];
841  }
842  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
843  void set_waiter(kmp_info_t *thr) {
844  waiting_threads[0] = thr;
845  num_waiting_threads = 1;
846  }
847  bool done_check_val(kmp_uint64 old_loc) {
848  return byteref(&old_loc, offset) == checker;
849  }
850  bool done_check() { return done_check_val(*get()); }
851  bool notdone_check() {
852  // Calculate flag_switch
853  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
854  flag_switch = true;
855  if (byteref(get(), offset) != 1 && !flag_switch)
856  return true;
857  else if (flag_switch) {
858  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
859  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
860  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
861  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
862  }
863  return false;
864  }
865  void internal_release() {
866  // Other threads can write their own bytes simultaneously.
867  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
868  byteref(get(), offset) = 1;
869  } else {
870  kmp_uint64 mask = 0;
871  byteref(&mask, offset) = 1;
872  KMP_TEST_THEN_OR64(get(), mask);
873  }
874  }
875  kmp_uint64 set_sleeping() {
876  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
877  }
878  kmp_uint64 unset_sleeping() {
879  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
880  }
881  bool is_sleeping_val(kmp_uint64 old_loc) {
882  return old_loc & KMP_BARRIER_SLEEP_STATE;
883  }
884  bool is_sleeping() { return is_sleeping_val(*get()); }
885  bool is_any_sleeping() { return is_sleeping_val(*get()); }
886  void wait(kmp_info_t *this_thr, int final_spin) {
887  if (final_spin)
888  __kmp_wait_template<kmp_flag_oncore, TRUE>(
889  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
890  else
891  __kmp_wait_template<kmp_flag_oncore, FALSE>(
892  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893  }
894  void release() { __kmp_release_template(this); }
895  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
896  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
897  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
898  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
899  kmp_int32 is_constrained) {
900  return __kmp_execute_tasks_oncore(
901  this_thr, gtid, this, final_spin,
902  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
903  }
904  kmp_uint8 *get_stolen() { return NULL; }
905  enum barrier_type get_bt() { return bt; }
906  flag_type get_ptr_type() { return flag_oncore; }
907 };
908 
909 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
910 // associated with int gtid.
911 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
912  if (!flag)
913  return;
914 
915  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
916  case flag32:
917  __kmp_resume_32(gtid, NULL);
918  break;
919  case flag64:
920  __kmp_resume_64(gtid, NULL);
921  break;
922  case flag_oncore:
923  __kmp_resume_oncore(gtid, NULL);
924  break;
925  }
926 }
927 
932 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63