LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
74  // Normal exit process on Windows does not allow worker threads of the final
75  // parallel region to finish reporting their events, so shutting down the
76  // library here fixes the issue at least for the cases where __kmpc_end() is
77  // placed properly.
78  if (ompt_enabled.enabled)
79  __kmp_internal_end_library(__kmp_gtid_get_specific());
80 #endif
81 }
82 
102  kmp_int32 gtid = __kmp_entry_gtid();
103 
104  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
105 
106  return gtid;
107 }
108 
124  KC_TRACE(10,
125  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 
127  return TCR_4(__kmp_all_nth);
128 }
129 
137  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138  return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 }
140 
147  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
148 
149  return __kmp_entry_thread()->th.th_team->t.t_nproc;
150 }
151 
158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
159 #ifndef KMP_DEBUG
160 
161  return TRUE;
162 
163 #else
164 
165  const char *semi2;
166  const char *semi3;
167  int line_no;
168 
169  if (__kmp_par_range == 0) {
170  return TRUE;
171  }
172  semi2 = loc->psource;
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  semi2 = strchr(semi2, ';');
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2 + 1, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  if (__kmp_par_range_filename[0]) {
185  const char *name = semi2 - 1;
186  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187  name--;
188  }
189  if ((*name == '/') || (*name == ';')) {
190  name++;
191  }
192  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193  return __kmp_par_range < 0;
194  }
195  }
196  semi3 = strchr(semi2 + 1, ';');
197  if (__kmp_par_range_routine[0]) {
198  if ((semi3 != NULL) && (semi3 > semi2) &&
199  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200  return __kmp_par_range < 0;
201  }
202  }
203  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205  return __kmp_par_range > 0;
206  }
207  return __kmp_par_range < 0;
208  }
209  return TRUE;
210 
211 #endif /* KMP_DEBUG */
212 }
213 
220 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221  return __kmp_entry_thread()->th.th_root->r.r_active;
222 }
223 
233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234  kmp_int32 num_threads) {
235  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236  global_tid, num_threads));
237 
238  __kmp_push_num_threads(loc, global_tid, num_threads);
239 }
240 
241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
243 
244  /* the num_threads are automatically popped */
245 }
246 
247 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
248  kmp_int32 proc_bind) {
249  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
250  proc_bind));
251 
252  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
253 }
254 
265 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
266  int gtid = __kmp_entry_gtid();
267 
268 #if (KMP_STATS_ENABLED)
269  // If we were in a serial region, then stop the serial timer, record
270  // the event, and start parallel region timer
271  stats_state_e previous_state = KMP_GET_THREAD_STATE();
272  if (previous_state == stats_state_e::SERIAL_REGION) {
273  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
274  } else {
275  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
276  }
277  int inParallel = __kmpc_in_parallel(loc);
278  if (inParallel) {
279  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
280  } else {
281  KMP_COUNT_BLOCK(OMP_PARALLEL);
282  }
283 #endif
284 
285  // maybe to save thr_state is enough here
286  {
287  va_list ap;
288  va_start(ap, microtask);
289 
290 #if OMPT_SUPPORT
291  ompt_frame_t *ompt_frame;
292  if (ompt_enabled.enabled) {
293  kmp_info_t *master_th = __kmp_threads[gtid];
294  kmp_team_t *parent_team = master_th->th.th_team;
295  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
296  if (lwt)
297  ompt_frame = &(lwt->ompt_task_info.frame);
298  else {
299  int tid = __kmp_tid_from_gtid(gtid);
300  ompt_frame = &(
301  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
302  }
303  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
304  OMPT_STORE_RETURN_ADDRESS(gtid);
305  }
306 #endif
307 
308 #if INCLUDE_SSC_MARKS
309  SSC_MARK_FORKING();
310 #endif
311  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
312  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
313  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
314 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
315 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
316  &ap
317 #else
318  ap
319 #endif
320  );
321 #if INCLUDE_SSC_MARKS
322  SSC_MARK_JOINING();
323 #endif
324  __kmp_join_call(loc, gtid
325 #if OMPT_SUPPORT
326  ,
327  fork_context_intel
328 #endif
329  );
330 
331  va_end(ap);
332  }
333 
334 #if KMP_STATS_ENABLED
335  if (previous_state == stats_state_e::SERIAL_REGION) {
336  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
337  } else {
338  KMP_POP_PARTITIONED_TIMER();
339  }
340 #endif // KMP_STATS_ENABLED
341 }
342 
354 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
355  kmp_int32 num_teams, kmp_int32 num_threads) {
356  KA_TRACE(20,
357  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
358  global_tid, num_teams, num_threads));
359 
360  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
361 }
362 
373 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
374  ...) {
375  int gtid = __kmp_entry_gtid();
376  kmp_info_t *this_thr = __kmp_threads[gtid];
377  va_list ap;
378  va_start(ap, microtask);
379 
380 #if KMP_STATS_ENABLED
381  KMP_COUNT_BLOCK(OMP_TEAMS);
382  stats_state_e previous_state = KMP_GET_THREAD_STATE();
383  if (previous_state == stats_state_e::SERIAL_REGION) {
384  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
385  } else {
386  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
387  }
388 #endif
389 
390  // remember teams entry point and nesting level
391  this_thr->th.th_teams_microtask = microtask;
392  this_thr->th.th_teams_level =
393  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
394 
395 #if OMPT_SUPPORT
396  kmp_team_t *parent_team = this_thr->th.th_team;
397  int tid = __kmp_tid_from_gtid(gtid);
398  if (ompt_enabled.enabled) {
399  parent_team->t.t_implicit_task_taskdata[tid]
400  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
401  }
402  OMPT_STORE_RETURN_ADDRESS(gtid);
403 #endif
404 
405  // check if __kmpc_push_num_teams called, set default number of teams
406  // otherwise
407  if (this_thr->th.th_teams_size.nteams == 0) {
408  __kmp_push_num_teams(loc, gtid, 0, 0);
409  }
410  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
411  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
412  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
413 
414  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
415  VOLATILE_CAST(microtask_t)
416  __kmp_teams_master, // "wrapped" task
417  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
418 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
419  &ap
420 #else
421  ap
422 #endif
423  );
424  __kmp_join_call(loc, gtid
425 #if OMPT_SUPPORT
426  ,
427  fork_context_intel
428 #endif
429  );
430 
431  // Pop current CG root off list
432  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
433  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
434  this_thr->th.th_cg_roots = tmp->up;
435  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
436  " to node %p. cg_nthreads was %d\n",
437  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
438  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
439  int i = tmp->cg_nthreads--;
440  if (i == 1) { // check is we are the last thread in CG (not always the case)
441  __kmp_free(tmp);
442  }
443  // Restore current task's thread_limit from CG root
444  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
445  this_thr->th.th_current_task->td_icvs.thread_limit =
446  this_thr->th.th_cg_roots->cg_thread_limit;
447 
448  this_thr->th.th_teams_microtask = NULL;
449  this_thr->th.th_teams_level = 0;
450  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
451  va_end(ap);
452 #if KMP_STATS_ENABLED
453  if (previous_state == stats_state_e::SERIAL_REGION) {
454  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
455  } else {
456  KMP_POP_PARTITIONED_TIMER();
457  }
458 #endif // KMP_STATS_ENABLED
459 }
460 
461 // I don't think this function should ever have been exported.
462 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
463 // openmp code ever called it, but it's been exported from the RTL for so
464 // long that I'm afraid to remove the definition.
465 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
466 
479 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
480 // The implementation is now in kmp_runtime.cpp so that it can share static
481 // functions with kmp_fork_call since the tasks to be done are similar in
482 // each case.
483 #if OMPT_SUPPORT
484  OMPT_STORE_RETURN_ADDRESS(global_tid);
485 #endif
486  __kmp_serialized_parallel(loc, global_tid);
487 }
488 
496 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
497  kmp_internal_control_t *top;
498  kmp_info_t *this_thr;
499  kmp_team_t *serial_team;
500 
501  KC_TRACE(10,
502  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
503 
504  /* skip all this code for autopar serialized loops since it results in
505  unacceptable overhead */
506  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
507  return;
508 
509  // Not autopar code
510  if (!TCR_4(__kmp_init_parallel))
511  __kmp_parallel_initialize();
512 
513  __kmp_resume_if_soft_paused();
514 
515  this_thr = __kmp_threads[global_tid];
516  serial_team = this_thr->th.th_serial_team;
517 
518  kmp_task_team_t *task_team = this_thr->th.th_task_team;
519  // we need to wait for the proxy tasks before finishing the thread
520  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
521  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
522 
523  KMP_MB();
524  KMP_DEBUG_ASSERT(serial_team);
525  KMP_ASSERT(serial_team->t.t_serialized);
526  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
527  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
528  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
529  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
530 
531 #if OMPT_SUPPORT
532  if (ompt_enabled.enabled &&
533  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
534  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
535  if (ompt_enabled.ompt_callback_implicit_task) {
536  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
537  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
538  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
539  }
540 
541  // reset clear the task id only after unlinking the task
542  ompt_data_t *parent_task_data;
543  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
544 
545  if (ompt_enabled.ompt_callback_parallel_end) {
546  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
547  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
548  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
549  }
550  __ompt_lw_taskteam_unlink(this_thr);
551  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
552  }
553 #endif
554 
555  /* If necessary, pop the internal control stack values and replace the team
556  * values */
557  top = serial_team->t.t_control_stack_top;
558  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
559  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
560  serial_team->t.t_control_stack_top = top->next;
561  __kmp_free(top);
562  }
563 
564  // if( serial_team -> t.t_serialized > 1 )
565  serial_team->t.t_level--;
566 
567  /* pop dispatch buffers stack */
568  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
569  {
570  dispatch_private_info_t *disp_buffer =
571  serial_team->t.t_dispatch->th_disp_buffer;
572  serial_team->t.t_dispatch->th_disp_buffer =
573  serial_team->t.t_dispatch->th_disp_buffer->next;
574  __kmp_free(disp_buffer);
575  }
576  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
577 
578  --serial_team->t.t_serialized;
579  if (serial_team->t.t_serialized == 0) {
580 
581 /* return to the parallel section */
582 
583 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
584  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
585  __kmp_clear_x87_fpu_status_word();
586  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
587  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
588  }
589 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
590 
591  this_thr->th.th_team = serial_team->t.t_parent;
592  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
593 
594  /* restore values cached in the thread */
595  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
596  this_thr->th.th_team_master =
597  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
598  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
599 
600  /* TODO the below shouldn't need to be adjusted for serialized teams */
601  this_thr->th.th_dispatch =
602  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
603 
604  __kmp_pop_current_task_from_thread(this_thr);
605 
606  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
607  this_thr->th.th_current_task->td_flags.executing = 1;
608 
609  if (__kmp_tasking_mode != tskm_immediate_exec) {
610  // Copy the task team from the new child / old parent team to the thread.
611  this_thr->th.th_task_team =
612  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
613  KA_TRACE(20,
614  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
615  "team %p\n",
616  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
617  }
618  } else {
619  if (__kmp_tasking_mode != tskm_immediate_exec) {
620  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
621  "depth of serial team %p to %d\n",
622  global_tid, serial_team, serial_team->t.t_serialized));
623  }
624  }
625 
626  if (__kmp_env_consistency_check)
627  __kmp_pop_parallel(global_tid, NULL);
628 #if OMPT_SUPPORT
629  if (ompt_enabled.enabled)
630  this_thr->th.ompt_thread_info.state =
631  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
632  : ompt_state_work_parallel);
633 #endif
634 }
635 
644 void __kmpc_flush(ident_t *loc) {
645  KC_TRACE(10, ("__kmpc_flush: called\n"));
646 
647  /* need explicit __mf() here since use volatile instead in library */
648  KMP_MB(); /* Flush all pending memory write invalidates. */
649 
650 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
651 #if KMP_MIC
652 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
653 // We shouldn't need it, though, since the ABI rules require that
654 // * If the compiler generates NGO stores it also generates the fence
655 // * If users hand-code NGO stores they should insert the fence
656 // therefore no incomplete unordered stores should be visible.
657 #else
658  // C74404
659  // This is to address non-temporal store instructions (sfence needed).
660  // The clflush instruction is addressed either (mfence needed).
661  // Probably the non-temporal load monvtdqa instruction should also be
662  // addressed.
663  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
664  if (!__kmp_cpuinfo.initialized) {
665  __kmp_query_cpuid(&__kmp_cpuinfo);
666  }
667  if (!__kmp_cpuinfo.sse2) {
668  // CPU cannot execute SSE2 instructions.
669  } else {
670 #if KMP_COMPILER_ICC
671  _mm_mfence();
672 #elif KMP_COMPILER_MSVC
673  MemoryBarrier();
674 #else
675  __sync_synchronize();
676 #endif // KMP_COMPILER_ICC
677  }
678 #endif // KMP_MIC
679 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
680 // Nothing to see here move along
681 #elif KMP_ARCH_PPC64
682 // Nothing needed here (we have a real MB above).
683 #if KMP_OS_CNK
684  // The flushing thread needs to yield here; this prevents a
685  // busy-waiting thread from saturating the pipeline. flush is
686  // often used in loops like this:
687  // while (!flag) {
688  // #pragma omp flush(flag)
689  // }
690  // and adding the yield here is good for at least a 10x speedup
691  // when running >2 threads per core (on the NAS LU benchmark).
692  __kmp_yield();
693 #endif
694 #else
695 #error Unknown or unsupported architecture
696 #endif
697 
698 #if OMPT_SUPPORT && OMPT_OPTIONAL
699  if (ompt_enabled.ompt_callback_flush) {
700  ompt_callbacks.ompt_callback(ompt_callback_flush)(
701  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
702  }
703 #endif
704 }
705 
706 /* -------------------------------------------------------------------------- */
714 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
715  KMP_COUNT_BLOCK(OMP_BARRIER);
716  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
717 
718  if (!TCR_4(__kmp_init_parallel))
719  __kmp_parallel_initialize();
720 
721  __kmp_resume_if_soft_paused();
722 
723  if (__kmp_env_consistency_check) {
724  if (loc == 0) {
725  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
726  }
727  __kmp_check_barrier(global_tid, ct_barrier, loc);
728  }
729 
730 #if OMPT_SUPPORT
731  ompt_frame_t *ompt_frame;
732  if (ompt_enabled.enabled) {
733  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
734  if (ompt_frame->enter_frame.ptr == NULL)
735  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
736  OMPT_STORE_RETURN_ADDRESS(global_tid);
737  }
738 #endif
739  __kmp_threads[global_tid]->th.th_ident = loc;
740  // TODO: explicit barrier_wait_id:
741  // this function is called when 'barrier' directive is present or
742  // implicit barrier at the end of a worksharing construct.
743  // 1) better to add a per-thread barrier counter to a thread data structure
744  // 2) set to 0 when a new team is created
745  // 4) no sync is required
746 
747  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
748 #if OMPT_SUPPORT && OMPT_OPTIONAL
749  if (ompt_enabled.enabled) {
750  ompt_frame->enter_frame = ompt_data_none;
751  }
752 #endif
753 }
754 
755 /* The BARRIER for a MASTER section is always explicit */
762 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
763  int status = 0;
764 
765  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
766 
767  if (!TCR_4(__kmp_init_parallel))
768  __kmp_parallel_initialize();
769 
770  __kmp_resume_if_soft_paused();
771 
772  if (KMP_MASTER_GTID(global_tid)) {
773  KMP_COUNT_BLOCK(OMP_MASTER);
774  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
775  status = 1;
776  }
777 
778 #if OMPT_SUPPORT && OMPT_OPTIONAL
779  if (status) {
780  if (ompt_enabled.ompt_callback_master) {
781  kmp_info_t *this_thr = __kmp_threads[global_tid];
782  kmp_team_t *team = this_thr->th.th_team;
783 
784  int tid = __kmp_tid_from_gtid(global_tid);
785  ompt_callbacks.ompt_callback(ompt_callback_master)(
786  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
787  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
788  OMPT_GET_RETURN_ADDRESS(0));
789  }
790  }
791 #endif
792 
793  if (__kmp_env_consistency_check) {
794 #if KMP_USE_DYNAMIC_LOCK
795  if (status)
796  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
797  else
798  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
799 #else
800  if (status)
801  __kmp_push_sync(global_tid, ct_master, loc, NULL);
802  else
803  __kmp_check_sync(global_tid, ct_master, loc, NULL);
804 #endif
805  }
806 
807  return status;
808 }
809 
818 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
819  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
820 
821  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
822  KMP_POP_PARTITIONED_TIMER();
823 
824 #if OMPT_SUPPORT && OMPT_OPTIONAL
825  kmp_info_t *this_thr = __kmp_threads[global_tid];
826  kmp_team_t *team = this_thr->th.th_team;
827  if (ompt_enabled.ompt_callback_master) {
828  int tid = __kmp_tid_from_gtid(global_tid);
829  ompt_callbacks.ompt_callback(ompt_callback_master)(
830  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
831  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832  OMPT_GET_RETURN_ADDRESS(0));
833  }
834 #endif
835 
836  if (__kmp_env_consistency_check) {
837  if (global_tid < 0)
838  KMP_WARNING(ThreadIdentInvalid);
839 
840  if (KMP_MASTER_GTID(global_tid))
841  __kmp_pop_sync(global_tid, ct_master, loc);
842  }
843 }
844 
852 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
853  int cid = 0;
854  kmp_info_t *th;
855  KMP_DEBUG_ASSERT(__kmp_init_serial);
856 
857  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
858 
859  if (!TCR_4(__kmp_init_parallel))
860  __kmp_parallel_initialize();
861 
862  __kmp_resume_if_soft_paused();
863 
864 #if USE_ITT_BUILD
865  __kmp_itt_ordered_prep(gtid);
866 // TODO: ordered_wait_id
867 #endif /* USE_ITT_BUILD */
868 
869  th = __kmp_threads[gtid];
870 
871 #if OMPT_SUPPORT && OMPT_OPTIONAL
872  kmp_team_t *team;
873  ompt_wait_id_t lck;
874  void *codeptr_ra;
875  if (ompt_enabled.enabled) {
876  OMPT_STORE_RETURN_ADDRESS(gtid);
877  team = __kmp_team_from_gtid(gtid);
878  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
879  /* OMPT state update */
880  th->th.ompt_thread_info.wait_id = lck;
881  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
882 
883  /* OMPT event callback */
884  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
885  if (ompt_enabled.ompt_callback_mutex_acquire) {
886  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
887  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
888  codeptr_ra);
889  }
890  }
891 #endif
892 
893  if (th->th.th_dispatch->th_deo_fcn != 0)
894  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
895  else
896  __kmp_parallel_deo(&gtid, &cid, loc);
897 
898 #if OMPT_SUPPORT && OMPT_OPTIONAL
899  if (ompt_enabled.enabled) {
900  /* OMPT state update */
901  th->th.ompt_thread_info.state = ompt_state_work_parallel;
902  th->th.ompt_thread_info.wait_id = 0;
903 
904  /* OMPT event callback */
905  if (ompt_enabled.ompt_callback_mutex_acquired) {
906  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
907  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
908  }
909  }
910 #endif
911 
912 #if USE_ITT_BUILD
913  __kmp_itt_ordered_start(gtid);
914 #endif /* USE_ITT_BUILD */
915 }
916 
924 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
925  int cid = 0;
926  kmp_info_t *th;
927 
928  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
929 
930 #if USE_ITT_BUILD
931  __kmp_itt_ordered_end(gtid);
932 // TODO: ordered_wait_id
933 #endif /* USE_ITT_BUILD */
934 
935  th = __kmp_threads[gtid];
936 
937  if (th->th.th_dispatch->th_dxo_fcn != 0)
938  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
939  else
940  __kmp_parallel_dxo(&gtid, &cid, loc);
941 
942 #if OMPT_SUPPORT && OMPT_OPTIONAL
943  OMPT_STORE_RETURN_ADDRESS(gtid);
944  if (ompt_enabled.ompt_callback_mutex_released) {
945  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
946  ompt_mutex_ordered,
947  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
948  ->t.t_ordered.dt.t_value,
949  OMPT_LOAD_RETURN_ADDRESS(gtid));
950  }
951 #endif
952 }
953 
954 #if KMP_USE_DYNAMIC_LOCK
955 
956 static __forceinline void
957 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
958  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
959  // Pointer to the allocated indirect lock is written to crit, while indexing
960  // is ignored.
961  void *idx;
962  kmp_indirect_lock_t **lck;
963  lck = (kmp_indirect_lock_t **)crit;
964  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966  KMP_SET_I_LOCK_LOCATION(ilk, loc);
967  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
968  KA_TRACE(20,
969  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
970 #if USE_ITT_BUILD
971  __kmp_itt_critical_creating(ilk->lock, loc);
972 #endif
973  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
974  if (status == 0) {
975 #if USE_ITT_BUILD
976  __kmp_itt_critical_destroyed(ilk->lock);
977 #endif
978  // We don't really need to destroy the unclaimed lock here since it will be
979  // cleaned up at program exit.
980  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
981  }
982  KMP_DEBUG_ASSERT(*lck != NULL);
983 }
984 
985 // Fast-path acquire tas lock
986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
987  { \
988  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
989  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
990  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
991  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
993  kmp_uint32 spins; \
994  KMP_FSYNC_PREPARE(l); \
995  KMP_INIT_YIELD(spins); \
996  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
997  do { \
998  if (TCR_4(__kmp_nth) > \
999  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1000  KMP_YIELD(TRUE); \
1001  } else { \
1002  KMP_YIELD_SPIN(spins); \
1003  } \
1004  __kmp_spin_backoff(&backoff); \
1005  } while ( \
1006  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1007  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1008  } \
1009  KMP_FSYNC_ACQUIRED(l); \
1010  }
1011 
1012 // Fast-path test tas lock
1013 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1014  { \
1015  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1016  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1017  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1018  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1019  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1020  }
1021 
1022 // Fast-path release tas lock
1023 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1024  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1025 
1026 #if KMP_USE_FUTEX
1027 
1028 #include <sys/syscall.h>
1029 #include <unistd.h>
1030 #ifndef FUTEX_WAIT
1031 #define FUTEX_WAIT 0
1032 #endif
1033 #ifndef FUTEX_WAKE
1034 #define FUTEX_WAKE 1
1035 #endif
1036 
1037 // Fast-path acquire futex lock
1038 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1039  { \
1040  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041  kmp_int32 gtid_code = (gtid + 1) << 1; \
1042  KMP_MB(); \
1043  KMP_FSYNC_PREPARE(ftx); \
1044  kmp_int32 poll_val; \
1045  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1046  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1047  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1048  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1049  if (!cond) { \
1050  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1051  poll_val | \
1052  KMP_LOCK_BUSY(1, futex))) { \
1053  continue; \
1054  } \
1055  poll_val |= KMP_LOCK_BUSY(1, futex); \
1056  } \
1057  kmp_int32 rc; \
1058  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1059  NULL, NULL, 0)) != 0) { \
1060  continue; \
1061  } \
1062  gtid_code |= 1; \
1063  } \
1064  KMP_FSYNC_ACQUIRED(ftx); \
1065  }
1066 
1067 // Fast-path test futex lock
1068 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1069  { \
1070  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1071  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1072  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1073  KMP_FSYNC_ACQUIRED(ftx); \
1074  rc = TRUE; \
1075  } else { \
1076  rc = FALSE; \
1077  } \
1078  }
1079 
1080 // Fast-path release futex lock
1081 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1082  { \
1083  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1084  KMP_MB(); \
1085  KMP_FSYNC_RELEASING(ftx); \
1086  kmp_int32 poll_val = \
1087  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1088  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1089  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1090  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1091  } \
1092  KMP_MB(); \
1093  KMP_YIELD_OVERSUB(); \
1094  }
1095 
1096 #endif // KMP_USE_FUTEX
1097 
1098 #else // KMP_USE_DYNAMIC_LOCK
1099 
1100 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1101  ident_t const *loc,
1102  kmp_int32 gtid) {
1103  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1104 
1105  // Because of the double-check, the following load doesn't need to be volatile
1106  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1107 
1108  if (lck == NULL) {
1109  void *idx;
1110 
1111  // Allocate & initialize the lock.
1112  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1113  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1114  __kmp_init_user_lock_with_checks(lck);
1115  __kmp_set_user_lock_location(lck, loc);
1116 #if USE_ITT_BUILD
1117  __kmp_itt_critical_creating(lck);
1118 // __kmp_itt_critical_creating() should be called *before* the first usage
1119 // of underlying lock. It is the only place where we can guarantee it. There
1120 // are chances the lock will destroyed with no usage, but it is not a
1121 // problem, because this is not real event seen by user but rather setting
1122 // name for object (lock). See more details in kmp_itt.h.
1123 #endif /* USE_ITT_BUILD */
1124 
1125  // Use a cmpxchg instruction to slam the start of the critical section with
1126  // the lock pointer. If another thread beat us to it, deallocate the lock,
1127  // and use the lock that the other thread allocated.
1128  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1129 
1130  if (status == 0) {
1131 // Deallocate the lock and reload the value.
1132 #if USE_ITT_BUILD
1133  __kmp_itt_critical_destroyed(lck);
1134 // Let ITT know the lock is destroyed and the same memory location may be reused
1135 // for another purpose.
1136 #endif /* USE_ITT_BUILD */
1137  __kmp_destroy_user_lock_with_checks(lck);
1138  __kmp_user_lock_free(&idx, gtid, lck);
1139  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1140  KMP_DEBUG_ASSERT(lck != NULL);
1141  }
1142  }
1143  return lck;
1144 }
1145 
1146 #endif // KMP_USE_DYNAMIC_LOCK
1147 
1158 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1159  kmp_critical_name *crit) {
1160 #if KMP_USE_DYNAMIC_LOCK
1161 #if OMPT_SUPPORT && OMPT_OPTIONAL
1162  OMPT_STORE_RETURN_ADDRESS(global_tid);
1163 #endif // OMPT_SUPPORT
1164  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1165 #else
1166  KMP_COUNT_BLOCK(OMP_CRITICAL);
1167 #if OMPT_SUPPORT && OMPT_OPTIONAL
1168  ompt_state_t prev_state = ompt_state_undefined;
1169  ompt_thread_info_t ti;
1170 #endif
1171  kmp_user_lock_p lck;
1172 
1173  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1174 
1175  // TODO: add THR_OVHD_STATE
1176 
1177  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1178  KMP_CHECK_USER_LOCK_INIT();
1179 
1180  if ((__kmp_user_lock_kind == lk_tas) &&
1181  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1182  lck = (kmp_user_lock_p)crit;
1183  }
1184 #if KMP_USE_FUTEX
1185  else if ((__kmp_user_lock_kind == lk_futex) &&
1186  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1187  lck = (kmp_user_lock_p)crit;
1188  }
1189 #endif
1190  else { // ticket, queuing or drdpa
1191  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1192  }
1193 
1194  if (__kmp_env_consistency_check)
1195  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1196 
1197 // since the critical directive binds to all threads, not just the current
1198 // team we have to check this even if we are in a serialized team.
1199 // also, even if we are the uber thread, we still have to conduct the lock,
1200 // as we have to contend with sibling threads.
1201 
1202 #if USE_ITT_BUILD
1203  __kmp_itt_critical_acquiring(lck);
1204 #endif /* USE_ITT_BUILD */
1205 #if OMPT_SUPPORT && OMPT_OPTIONAL
1206  OMPT_STORE_RETURN_ADDRESS(gtid);
1207  void *codeptr_ra = NULL;
1208  if (ompt_enabled.enabled) {
1209  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1210  /* OMPT state update */
1211  prev_state = ti.state;
1212  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1213  ti.state = ompt_state_wait_critical;
1214 
1215  /* OMPT event callback */
1216  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1217  if (ompt_enabled.ompt_callback_mutex_acquire) {
1218  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1219  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1220  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1221  }
1222  }
1223 #endif
1224  // Value of 'crit' should be good for using as a critical_id of the critical
1225  // section directive.
1226  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1227 
1228 #if USE_ITT_BUILD
1229  __kmp_itt_critical_acquired(lck);
1230 #endif /* USE_ITT_BUILD */
1231 #if OMPT_SUPPORT && OMPT_OPTIONAL
1232  if (ompt_enabled.enabled) {
1233  /* OMPT state update */
1234  ti.state = prev_state;
1235  ti.wait_id = 0;
1236 
1237  /* OMPT event callback */
1238  if (ompt_enabled.ompt_callback_mutex_acquired) {
1239  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1240  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1241  }
1242  }
1243 #endif
1244  KMP_POP_PARTITIONED_TIMER();
1245 
1246  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1247  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1248 #endif // KMP_USE_DYNAMIC_LOCK
1249 }
1250 
1251 #if KMP_USE_DYNAMIC_LOCK
1252 
1253 // Converts the given hint to an internal lock implementation
1254 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1255 #if KMP_USE_TSX
1256 #define KMP_TSX_LOCK(seq) lockseq_##seq
1257 #else
1258 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1259 #endif
1260 
1261 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1262 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1263 #else
1264 #define KMP_CPUINFO_RTM 0
1265 #endif
1266 
1267  // Hints that do not require further logic
1268  if (hint & kmp_lock_hint_hle)
1269  return KMP_TSX_LOCK(hle);
1270  if (hint & kmp_lock_hint_rtm)
1271  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1272  if (hint & kmp_lock_hint_adaptive)
1273  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1274 
1275  // Rule out conflicting hints first by returning the default lock
1276  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1277  return __kmp_user_lock_seq;
1278  if ((hint & omp_lock_hint_speculative) &&
1279  (hint & omp_lock_hint_nonspeculative))
1280  return __kmp_user_lock_seq;
1281 
1282  // Do not even consider speculation when it appears to be contended
1283  if (hint & omp_lock_hint_contended)
1284  return lockseq_queuing;
1285 
1286  // Uncontended lock without speculation
1287  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1288  return lockseq_tas;
1289 
1290  // HLE lock for speculation
1291  if (hint & omp_lock_hint_speculative)
1292  return KMP_TSX_LOCK(hle);
1293 
1294  return __kmp_user_lock_seq;
1295 }
1296 
1297 #if OMPT_SUPPORT && OMPT_OPTIONAL
1298 #if KMP_USE_DYNAMIC_LOCK
1299 static kmp_mutex_impl_t
1300 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1301  if (user_lock) {
1302  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1303  case 0:
1304  break;
1305 #if KMP_USE_FUTEX
1306  case locktag_futex:
1307  return kmp_mutex_impl_queuing;
1308 #endif
1309  case locktag_tas:
1310  return kmp_mutex_impl_spin;
1311 #if KMP_USE_TSX
1312  case locktag_hle:
1313  return kmp_mutex_impl_speculative;
1314 #endif
1315  default:
1316  return kmp_mutex_impl_none;
1317  }
1318  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1319  }
1320  KMP_ASSERT(ilock);
1321  switch (ilock->type) {
1322 #if KMP_USE_TSX
1323  case locktag_adaptive:
1324  case locktag_rtm:
1325  return kmp_mutex_impl_speculative;
1326 #endif
1327  case locktag_nested_tas:
1328  return kmp_mutex_impl_spin;
1329 #if KMP_USE_FUTEX
1330  case locktag_nested_futex:
1331 #endif
1332  case locktag_ticket:
1333  case locktag_queuing:
1334  case locktag_drdpa:
1335  case locktag_nested_ticket:
1336  case locktag_nested_queuing:
1337  case locktag_nested_drdpa:
1338  return kmp_mutex_impl_queuing;
1339  default:
1340  return kmp_mutex_impl_none;
1341  }
1342 }
1343 #else
1344 // For locks without dynamic binding
1345 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1346  switch (__kmp_user_lock_kind) {
1347  case lk_tas:
1348  return kmp_mutex_impl_spin;
1349 #if KMP_USE_FUTEX
1350  case lk_futex:
1351 #endif
1352  case lk_ticket:
1353  case lk_queuing:
1354  case lk_drdpa:
1355  return kmp_mutex_impl_queuing;
1356 #if KMP_USE_TSX
1357  case lk_hle:
1358  case lk_rtm:
1359  case lk_adaptive:
1360  return kmp_mutex_impl_speculative;
1361 #endif
1362  default:
1363  return kmp_mutex_impl_none;
1364  }
1365 }
1366 #endif // KMP_USE_DYNAMIC_LOCK
1367 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1368 
1382 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1383  kmp_critical_name *crit, uint32_t hint) {
1384  KMP_COUNT_BLOCK(OMP_CRITICAL);
1385  kmp_user_lock_p lck;
1386 #if OMPT_SUPPORT && OMPT_OPTIONAL
1387  ompt_state_t prev_state = ompt_state_undefined;
1388  ompt_thread_info_t ti;
1389  // This is the case, if called from __kmpc_critical:
1390  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1391  if (!codeptr)
1392  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1393 #endif
1394 
1395  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1396 
1397  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1398  // Check if it is initialized.
1399  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1400  if (*lk == 0) {
1401  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1402  if (KMP_IS_D_LOCK(lckseq)) {
1403  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1404  KMP_GET_D_TAG(lckseq));
1405  } else {
1406  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1407  }
1408  }
1409  // Branch for accessing the actual lock object and set operation. This
1410  // branching is inevitable since this lock initialization does not follow the
1411  // normal dispatch path (lock table is not used).
1412  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1413  lck = (kmp_user_lock_p)lk;
1414  if (__kmp_env_consistency_check) {
1415  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1416  __kmp_map_hint_to_lock(hint));
1417  }
1418 #if USE_ITT_BUILD
1419  __kmp_itt_critical_acquiring(lck);
1420 #endif
1421 #if OMPT_SUPPORT && OMPT_OPTIONAL
1422  if (ompt_enabled.enabled) {
1423  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1424  /* OMPT state update */
1425  prev_state = ti.state;
1426  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1427  ti.state = ompt_state_wait_critical;
1428 
1429  /* OMPT event callback */
1430  if (ompt_enabled.ompt_callback_mutex_acquire) {
1431  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1432  ompt_mutex_critical, (unsigned int)hint,
1433  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1434  codeptr);
1435  }
1436  }
1437 #endif
1438 #if KMP_USE_INLINED_TAS
1439  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1440  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1441  } else
1442 #elif KMP_USE_INLINED_FUTEX
1443  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1444  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1445  } else
1446 #endif
1447  {
1448  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1449  }
1450  } else {
1451  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1452  lck = ilk->lock;
1453  if (__kmp_env_consistency_check) {
1454  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1455  __kmp_map_hint_to_lock(hint));
1456  }
1457 #if USE_ITT_BUILD
1458  __kmp_itt_critical_acquiring(lck);
1459 #endif
1460 #if OMPT_SUPPORT && OMPT_OPTIONAL
1461  if (ompt_enabled.enabled) {
1462  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1463  /* OMPT state update */
1464  prev_state = ti.state;
1465  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1466  ti.state = ompt_state_wait_critical;
1467 
1468  /* OMPT event callback */
1469  if (ompt_enabled.ompt_callback_mutex_acquire) {
1470  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1471  ompt_mutex_critical, (unsigned int)hint,
1472  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1473  codeptr);
1474  }
1475  }
1476 #endif
1477  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1478  }
1479  KMP_POP_PARTITIONED_TIMER();
1480 
1481 #if USE_ITT_BUILD
1482  __kmp_itt_critical_acquired(lck);
1483 #endif /* USE_ITT_BUILD */
1484 #if OMPT_SUPPORT && OMPT_OPTIONAL
1485  if (ompt_enabled.enabled) {
1486  /* OMPT state update */
1487  ti.state = prev_state;
1488  ti.wait_id = 0;
1489 
1490  /* OMPT event callback */
1491  if (ompt_enabled.ompt_callback_mutex_acquired) {
1492  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1493  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1494  }
1495  }
1496 #endif
1497 
1498  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1499  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1500 } // __kmpc_critical_with_hint
1501 
1502 #endif // KMP_USE_DYNAMIC_LOCK
1503 
1513 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1514  kmp_critical_name *crit) {
1515  kmp_user_lock_p lck;
1516 
1517  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1518 
1519 #if KMP_USE_DYNAMIC_LOCK
1520  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1521  lck = (kmp_user_lock_p)crit;
1522  KMP_ASSERT(lck != NULL);
1523  if (__kmp_env_consistency_check) {
1524  __kmp_pop_sync(global_tid, ct_critical, loc);
1525  }
1526 #if USE_ITT_BUILD
1527  __kmp_itt_critical_releasing(lck);
1528 #endif
1529 #if KMP_USE_INLINED_TAS
1530  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1531  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1532  } else
1533 #elif KMP_USE_INLINED_FUTEX
1534  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1535  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1536  } else
1537 #endif
1538  {
1539  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1540  }
1541  } else {
1542  kmp_indirect_lock_t *ilk =
1543  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1544  KMP_ASSERT(ilk != NULL);
1545  lck = ilk->lock;
1546  if (__kmp_env_consistency_check) {
1547  __kmp_pop_sync(global_tid, ct_critical, loc);
1548  }
1549 #if USE_ITT_BUILD
1550  __kmp_itt_critical_releasing(lck);
1551 #endif
1552  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1553  }
1554 
1555 #else // KMP_USE_DYNAMIC_LOCK
1556 
1557  if ((__kmp_user_lock_kind == lk_tas) &&
1558  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1559  lck = (kmp_user_lock_p)crit;
1560  }
1561 #if KMP_USE_FUTEX
1562  else if ((__kmp_user_lock_kind == lk_futex) &&
1563  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1564  lck = (kmp_user_lock_p)crit;
1565  }
1566 #endif
1567  else { // ticket, queuing or drdpa
1568  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1569  }
1570 
1571  KMP_ASSERT(lck != NULL);
1572 
1573  if (__kmp_env_consistency_check)
1574  __kmp_pop_sync(global_tid, ct_critical, loc);
1575 
1576 #if USE_ITT_BUILD
1577  __kmp_itt_critical_releasing(lck);
1578 #endif /* USE_ITT_BUILD */
1579  // Value of 'crit' should be good for using as a critical_id of the critical
1580  // section directive.
1581  __kmp_release_user_lock_with_checks(lck, global_tid);
1582 
1583 #endif // KMP_USE_DYNAMIC_LOCK
1584 
1585 #if OMPT_SUPPORT && OMPT_OPTIONAL
1586  /* OMPT release event triggers after lock is released; place here to trigger
1587  * for all #if branches */
1588  OMPT_STORE_RETURN_ADDRESS(global_tid);
1589  if (ompt_enabled.ompt_callback_mutex_released) {
1590  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1591  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1592  OMPT_LOAD_RETURN_ADDRESS(0));
1593  }
1594 #endif
1595 
1596  KMP_POP_PARTITIONED_TIMER();
1597  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1598 }
1599 
1609 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1610  int status;
1611 
1612  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1613 
1614  if (!TCR_4(__kmp_init_parallel))
1615  __kmp_parallel_initialize();
1616 
1617  __kmp_resume_if_soft_paused();
1618 
1619  if (__kmp_env_consistency_check)
1620  __kmp_check_barrier(global_tid, ct_barrier, loc);
1621 
1622 #if OMPT_SUPPORT
1623  ompt_frame_t *ompt_frame;
1624  if (ompt_enabled.enabled) {
1625  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1626  if (ompt_frame->enter_frame.ptr == NULL)
1627  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1628  OMPT_STORE_RETURN_ADDRESS(global_tid);
1629  }
1630 #endif
1631 #if USE_ITT_NOTIFY
1632  __kmp_threads[global_tid]->th.th_ident = loc;
1633 #endif
1634  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1635 #if OMPT_SUPPORT && OMPT_OPTIONAL
1636  if (ompt_enabled.enabled) {
1637  ompt_frame->enter_frame = ompt_data_none;
1638  }
1639 #endif
1640 
1641  return (status != 0) ? 0 : 1;
1642 }
1643 
1653 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1654  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1655 
1656  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1657 }
1658 
1669 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1670  kmp_int32 ret;
1671 
1672  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1673 
1674  if (!TCR_4(__kmp_init_parallel))
1675  __kmp_parallel_initialize();
1676 
1677  __kmp_resume_if_soft_paused();
1678 
1679  if (__kmp_env_consistency_check) {
1680  if (loc == 0) {
1681  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1682  }
1683  __kmp_check_barrier(global_tid, ct_barrier, loc);
1684  }
1685 
1686 #if OMPT_SUPPORT
1687  ompt_frame_t *ompt_frame;
1688  if (ompt_enabled.enabled) {
1689  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1690  if (ompt_frame->enter_frame.ptr == NULL)
1691  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1692  OMPT_STORE_RETURN_ADDRESS(global_tid);
1693  }
1694 #endif
1695 #if USE_ITT_NOTIFY
1696  __kmp_threads[global_tid]->th.th_ident = loc;
1697 #endif
1698  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1699 #if OMPT_SUPPORT && OMPT_OPTIONAL
1700  if (ompt_enabled.enabled) {
1701  ompt_frame->enter_frame = ompt_data_none;
1702  }
1703 #endif
1704 
1705  ret = __kmpc_master(loc, global_tid);
1706 
1707  if (__kmp_env_consistency_check) {
1708  /* there's no __kmpc_end_master called; so the (stats) */
1709  /* actions of __kmpc_end_master are done here */
1710 
1711  if (global_tid < 0) {
1712  KMP_WARNING(ThreadIdentInvalid);
1713  }
1714  if (ret) {
1715  /* only one thread should do the pop since only */
1716  /* one did the push (see __kmpc_master()) */
1717 
1718  __kmp_pop_sync(global_tid, ct_master, loc);
1719  }
1720  }
1721 
1722  return (ret);
1723 }
1724 
1725 /* The BARRIER for a SINGLE process section is always explicit */
1737 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1738  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1739 
1740  if (rc) {
1741  // We are going to execute the single statement, so we should count it.
1742  KMP_COUNT_BLOCK(OMP_SINGLE);
1743  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1744  }
1745 
1746 #if OMPT_SUPPORT && OMPT_OPTIONAL
1747  kmp_info_t *this_thr = __kmp_threads[global_tid];
1748  kmp_team_t *team = this_thr->th.th_team;
1749  int tid = __kmp_tid_from_gtid(global_tid);
1750 
1751  if (ompt_enabled.enabled) {
1752  if (rc) {
1753  if (ompt_enabled.ompt_callback_work) {
1754  ompt_callbacks.ompt_callback(ompt_callback_work)(
1755  ompt_work_single_executor, ompt_scope_begin,
1756  &(team->t.ompt_team_info.parallel_data),
1757  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1758  1, OMPT_GET_RETURN_ADDRESS(0));
1759  }
1760  } else {
1761  if (ompt_enabled.ompt_callback_work) {
1762  ompt_callbacks.ompt_callback(ompt_callback_work)(
1763  ompt_work_single_other, ompt_scope_begin,
1764  &(team->t.ompt_team_info.parallel_data),
1765  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766  1, OMPT_GET_RETURN_ADDRESS(0));
1767  ompt_callbacks.ompt_callback(ompt_callback_work)(
1768  ompt_work_single_other, ompt_scope_end,
1769  &(team->t.ompt_team_info.parallel_data),
1770  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1771  1, OMPT_GET_RETURN_ADDRESS(0));
1772  }
1773  }
1774  }
1775 #endif
1776 
1777  return rc;
1778 }
1779 
1789 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1790  __kmp_exit_single(global_tid);
1791  KMP_POP_PARTITIONED_TIMER();
1792 
1793 #if OMPT_SUPPORT && OMPT_OPTIONAL
1794  kmp_info_t *this_thr = __kmp_threads[global_tid];
1795  kmp_team_t *team = this_thr->th.th_team;
1796  int tid = __kmp_tid_from_gtid(global_tid);
1797 
1798  if (ompt_enabled.ompt_callback_work) {
1799  ompt_callbacks.ompt_callback(ompt_callback_work)(
1800  ompt_work_single_executor, ompt_scope_end,
1801  &(team->t.ompt_team_info.parallel_data),
1802  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1803  OMPT_GET_RETURN_ADDRESS(0));
1804  }
1805 #endif
1806 }
1807 
1815 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1816  KMP_POP_PARTITIONED_TIMER();
1817  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1818 
1819 #if OMPT_SUPPORT && OMPT_OPTIONAL
1820  if (ompt_enabled.ompt_callback_work) {
1821  ompt_work_t ompt_work_type = ompt_work_loop;
1822  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1823  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1824  // Determine workshare type
1825  if (loc != NULL) {
1826  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1827  ompt_work_type = ompt_work_loop;
1828  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1829  ompt_work_type = ompt_work_sections;
1830  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1831  ompt_work_type = ompt_work_distribute;
1832  } else {
1833  // use default set above.
1834  // a warning about this case is provided in __kmpc_for_static_init
1835  }
1836  KMP_DEBUG_ASSERT(ompt_work_type);
1837  }
1838  ompt_callbacks.ompt_callback(ompt_callback_work)(
1839  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1840  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1841  }
1842 #endif
1843  if (__kmp_env_consistency_check)
1844  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1845 }
1846 
1847 // User routines which take C-style arguments (call by value)
1848 // different from the Fortran equivalent routines
1849 
1850 void ompc_set_num_threads(int arg) {
1851  // !!!!! TODO: check the per-task binding
1852  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1853 }
1854 
1855 void ompc_set_dynamic(int flag) {
1856  kmp_info_t *thread;
1857 
1858  /* For the thread-private implementation of the internal controls */
1859  thread = __kmp_entry_thread();
1860 
1861  __kmp_save_internal_controls(thread);
1862 
1863  set__dynamic(thread, flag ? TRUE : FALSE);
1864 }
1865 
1866 void ompc_set_nested(int flag) {
1867  kmp_info_t *thread;
1868 
1869  /* For the thread-private internal controls implementation */
1870  thread = __kmp_entry_thread();
1871 
1872  __kmp_save_internal_controls(thread);
1873 
1874  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1875 }
1876 
1877 void ompc_set_max_active_levels(int max_active_levels) {
1878  /* TO DO */
1879  /* we want per-task implementation of this internal control */
1880 
1881  /* For the per-thread internal controls implementation */
1882  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1883 }
1884 
1885 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1886  // !!!!! TODO: check the per-task binding
1887  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1888 }
1889 
1890 int ompc_get_ancestor_thread_num(int level) {
1891  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1892 }
1893 
1894 int ompc_get_team_size(int level) {
1895  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1896 }
1897 
1898 /* OpenMP 5.0 Affinity Format API */
1899 
1900 void ompc_set_affinity_format(char const *format) {
1901  if (!__kmp_init_serial) {
1902  __kmp_serial_initialize();
1903  }
1904  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1905  format, KMP_STRLEN(format) + 1);
1906 }
1907 
1908 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1909  size_t format_size;
1910  if (!__kmp_init_serial) {
1911  __kmp_serial_initialize();
1912  }
1913  format_size = KMP_STRLEN(__kmp_affinity_format);
1914  if (buffer && size) {
1915  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1916  format_size + 1);
1917  }
1918  return format_size;
1919 }
1920 
1921 void ompc_display_affinity(char const *format) {
1922  int gtid;
1923  if (!TCR_4(__kmp_init_middle)) {
1924  __kmp_middle_initialize();
1925  }
1926  gtid = __kmp_get_gtid();
1927  __kmp_aux_display_affinity(gtid, format);
1928 }
1929 
1930 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1931  char const *format) {
1932  int gtid;
1933  size_t num_required;
1934  kmp_str_buf_t capture_buf;
1935  if (!TCR_4(__kmp_init_middle)) {
1936  __kmp_middle_initialize();
1937  }
1938  gtid = __kmp_get_gtid();
1939  __kmp_str_buf_init(&capture_buf);
1940  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1941  if (buffer && buf_size) {
1942  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1943  capture_buf.used + 1);
1944  }
1945  __kmp_str_buf_free(&capture_buf);
1946  return num_required;
1947 }
1948 
1949 void kmpc_set_stacksize(int arg) {
1950  // __kmp_aux_set_stacksize initializes the library if needed
1951  __kmp_aux_set_stacksize(arg);
1952 }
1953 
1954 void kmpc_set_stacksize_s(size_t arg) {
1955  // __kmp_aux_set_stacksize initializes the library if needed
1956  __kmp_aux_set_stacksize(arg);
1957 }
1958 
1959 void kmpc_set_blocktime(int arg) {
1960  int gtid, tid;
1961  kmp_info_t *thread;
1962 
1963  gtid = __kmp_entry_gtid();
1964  tid = __kmp_tid_from_gtid(gtid);
1965  thread = __kmp_thread_from_gtid(gtid);
1966 
1967  __kmp_aux_set_blocktime(arg, thread, tid);
1968 }
1969 
1970 void kmpc_set_library(int arg) {
1971  // __kmp_user_set_library initializes the library if needed
1972  __kmp_user_set_library((enum library_type)arg);
1973 }
1974 
1975 void kmpc_set_defaults(char const *str) {
1976  // __kmp_aux_set_defaults initializes the library if needed
1977  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1978 }
1979 
1980 void kmpc_set_disp_num_buffers(int arg) {
1981  // ignore after initialization because some teams have already
1982  // allocated dispatch buffers
1983  if (__kmp_init_serial == 0 && arg > 0)
1984  __kmp_dispatch_num_buffers = arg;
1985 }
1986 
1987 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1989  return -1;
1990 #else
1991  if (!TCR_4(__kmp_init_middle)) {
1992  __kmp_middle_initialize();
1993  }
1994  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1995 #endif
1996 }
1997 
1998 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1999 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2000  return -1;
2001 #else
2002  if (!TCR_4(__kmp_init_middle)) {
2003  __kmp_middle_initialize();
2004  }
2005  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2006 #endif
2007 }
2008 
2009 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2011  return -1;
2012 #else
2013  if (!TCR_4(__kmp_init_middle)) {
2014  __kmp_middle_initialize();
2015  }
2016  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2017 #endif
2018 }
2019 
2020 /* -------------------------------------------------------------------------- */
2065 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2066  void *cpy_data, void (*cpy_func)(void *, void *),
2067  kmp_int32 didit) {
2068  void **data_ptr;
2069 
2070  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2071 
2072  KMP_MB();
2073 
2074  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2075 
2076  if (__kmp_env_consistency_check) {
2077  if (loc == 0) {
2078  KMP_WARNING(ConstructIdentInvalid);
2079  }
2080  }
2081 
2082  // ToDo: Optimize the following two barriers into some kind of split barrier
2083 
2084  if (didit)
2085  *data_ptr = cpy_data;
2086 
2087 #if OMPT_SUPPORT
2088  ompt_frame_t *ompt_frame;
2089  if (ompt_enabled.enabled) {
2090  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2091  if (ompt_frame->enter_frame.ptr == NULL)
2092  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2093  OMPT_STORE_RETURN_ADDRESS(gtid);
2094  }
2095 #endif
2096 /* This barrier is not a barrier region boundary */
2097 #if USE_ITT_NOTIFY
2098  __kmp_threads[gtid]->th.th_ident = loc;
2099 #endif
2100  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2101 
2102  if (!didit)
2103  (*cpy_func)(cpy_data, *data_ptr);
2104 
2105 // Consider next barrier a user-visible barrier for barrier region boundaries
2106 // Nesting checks are already handled by the single construct checks
2107 
2108 #if OMPT_SUPPORT
2109  if (ompt_enabled.enabled) {
2110  OMPT_STORE_RETURN_ADDRESS(gtid);
2111  }
2112 #endif
2113 #if USE_ITT_NOTIFY
2114  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2115 // tasks can overwrite the location)
2116 #endif
2117  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2118 #if OMPT_SUPPORT && OMPT_OPTIONAL
2119  if (ompt_enabled.enabled) {
2120  ompt_frame->enter_frame = ompt_data_none;
2121  }
2122 #endif
2123 }
2124 
2125 /* -------------------------------------------------------------------------- */
2126 
2127 #define INIT_LOCK __kmp_init_user_lock_with_checks
2128 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2129 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2130 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2131 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2132 #define ACQUIRE_NESTED_LOCK_TIMED \
2133  __kmp_acquire_nested_user_lock_with_checks_timed
2134 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2135 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2136 #define TEST_LOCK __kmp_test_user_lock_with_checks
2137 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2138 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2139 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2140 
2141 // TODO: Make check abort messages use location info & pass it into
2142 // with_checks routines
2143 
2144 #if KMP_USE_DYNAMIC_LOCK
2145 
2146 // internal lock initializer
2147 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2148  kmp_dyna_lockseq_t seq) {
2149  if (KMP_IS_D_LOCK(seq)) {
2150  KMP_INIT_D_LOCK(lock, seq);
2151 #if USE_ITT_BUILD
2152  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2153 #endif
2154  } else {
2155  KMP_INIT_I_LOCK(lock, seq);
2156 #if USE_ITT_BUILD
2157  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2158  __kmp_itt_lock_creating(ilk->lock, loc);
2159 #endif
2160  }
2161 }
2162 
2163 // internal nest lock initializer
2164 static __forceinline void
2165 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2166  kmp_dyna_lockseq_t seq) {
2167 #if KMP_USE_TSX
2168  // Don't have nested lock implementation for speculative locks
2169  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2170  seq = __kmp_user_lock_seq;
2171 #endif
2172  switch (seq) {
2173  case lockseq_tas:
2174  seq = lockseq_nested_tas;
2175  break;
2176 #if KMP_USE_FUTEX
2177  case lockseq_futex:
2178  seq = lockseq_nested_futex;
2179  break;
2180 #endif
2181  case lockseq_ticket:
2182  seq = lockseq_nested_ticket;
2183  break;
2184  case lockseq_queuing:
2185  seq = lockseq_nested_queuing;
2186  break;
2187  case lockseq_drdpa:
2188  seq = lockseq_nested_drdpa;
2189  break;
2190  default:
2191  seq = lockseq_nested_queuing;
2192  }
2193  KMP_INIT_I_LOCK(lock, seq);
2194 #if USE_ITT_BUILD
2195  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2196  __kmp_itt_lock_creating(ilk->lock, loc);
2197 #endif
2198 }
2199 
2200 /* initialize the lock with a hint */
2201 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2202  uintptr_t hint) {
2203  KMP_DEBUG_ASSERT(__kmp_init_serial);
2204  if (__kmp_env_consistency_check && user_lock == NULL) {
2205  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2206  }
2207 
2208  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2209 
2210 #if OMPT_SUPPORT && OMPT_OPTIONAL
2211  // This is the case, if called from omp_init_lock_with_hint:
2212  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2213  if (!codeptr)
2214  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2215  if (ompt_enabled.ompt_callback_lock_init) {
2216  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2217  ompt_mutex_lock, (omp_lock_hint_t)hint,
2218  __ompt_get_mutex_impl_type(user_lock),
2219  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2220  }
2221 #endif
2222 }
2223 
2224 /* initialize the lock with a hint */
2225 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2226  void **user_lock, uintptr_t hint) {
2227  KMP_DEBUG_ASSERT(__kmp_init_serial);
2228  if (__kmp_env_consistency_check && user_lock == NULL) {
2229  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2230  }
2231 
2232  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2233 
2234 #if OMPT_SUPPORT && OMPT_OPTIONAL
2235  // This is the case, if called from omp_init_lock_with_hint:
2236  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2237  if (!codeptr)
2238  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2239  if (ompt_enabled.ompt_callback_lock_init) {
2240  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2241  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2242  __ompt_get_mutex_impl_type(user_lock),
2243  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2244  }
2245 #endif
2246 }
2247 
2248 #endif // KMP_USE_DYNAMIC_LOCK
2249 
2250 /* initialize the lock */
2251 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2252 #if KMP_USE_DYNAMIC_LOCK
2253 
2254  KMP_DEBUG_ASSERT(__kmp_init_serial);
2255  if (__kmp_env_consistency_check && user_lock == NULL) {
2256  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2257  }
2258  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2259 
2260 #if OMPT_SUPPORT && OMPT_OPTIONAL
2261  // This is the case, if called from omp_init_lock_with_hint:
2262  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2263  if (!codeptr)
2264  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2265  if (ompt_enabled.ompt_callback_lock_init) {
2266  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2267  ompt_mutex_lock, omp_lock_hint_none,
2268  __ompt_get_mutex_impl_type(user_lock),
2269  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2270  }
2271 #endif
2272 
2273 #else // KMP_USE_DYNAMIC_LOCK
2274 
2275  static char const *const func = "omp_init_lock";
2276  kmp_user_lock_p lck;
2277  KMP_DEBUG_ASSERT(__kmp_init_serial);
2278 
2279  if (__kmp_env_consistency_check) {
2280  if (user_lock == NULL) {
2281  KMP_FATAL(LockIsUninitialized, func);
2282  }
2283  }
2284 
2285  KMP_CHECK_USER_LOCK_INIT();
2286 
2287  if ((__kmp_user_lock_kind == lk_tas) &&
2288  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2289  lck = (kmp_user_lock_p)user_lock;
2290  }
2291 #if KMP_USE_FUTEX
2292  else if ((__kmp_user_lock_kind == lk_futex) &&
2293  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2294  lck = (kmp_user_lock_p)user_lock;
2295  }
2296 #endif
2297  else {
2298  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2299  }
2300  INIT_LOCK(lck);
2301  __kmp_set_user_lock_location(lck, loc);
2302 
2303 #if OMPT_SUPPORT && OMPT_OPTIONAL
2304  // This is the case, if called from omp_init_lock_with_hint:
2305  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2306  if (!codeptr)
2307  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2308  if (ompt_enabled.ompt_callback_lock_init) {
2309  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2310  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2311  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2312  }
2313 #endif
2314 
2315 #if USE_ITT_BUILD
2316  __kmp_itt_lock_creating(lck);
2317 #endif /* USE_ITT_BUILD */
2318 
2319 #endif // KMP_USE_DYNAMIC_LOCK
2320 } // __kmpc_init_lock
2321 
2322 /* initialize the lock */
2323 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2324 #if KMP_USE_DYNAMIC_LOCK
2325 
2326  KMP_DEBUG_ASSERT(__kmp_init_serial);
2327  if (__kmp_env_consistency_check && user_lock == NULL) {
2328  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2329  }
2330  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2331 
2332 #if OMPT_SUPPORT && OMPT_OPTIONAL
2333  // This is the case, if called from omp_init_lock_with_hint:
2334  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2335  if (!codeptr)
2336  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2337  if (ompt_enabled.ompt_callback_lock_init) {
2338  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2339  ompt_mutex_nest_lock, omp_lock_hint_none,
2340  __ompt_get_mutex_impl_type(user_lock),
2341  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2342  }
2343 #endif
2344 
2345 #else // KMP_USE_DYNAMIC_LOCK
2346 
2347  static char const *const func = "omp_init_nest_lock";
2348  kmp_user_lock_p lck;
2349  KMP_DEBUG_ASSERT(__kmp_init_serial);
2350 
2351  if (__kmp_env_consistency_check) {
2352  if (user_lock == NULL) {
2353  KMP_FATAL(LockIsUninitialized, func);
2354  }
2355  }
2356 
2357  KMP_CHECK_USER_LOCK_INIT();
2358 
2359  if ((__kmp_user_lock_kind == lk_tas) &&
2360  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2361  OMP_NEST_LOCK_T_SIZE)) {
2362  lck = (kmp_user_lock_p)user_lock;
2363  }
2364 #if KMP_USE_FUTEX
2365  else if ((__kmp_user_lock_kind == lk_futex) &&
2366  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2367  OMP_NEST_LOCK_T_SIZE)) {
2368  lck = (kmp_user_lock_p)user_lock;
2369  }
2370 #endif
2371  else {
2372  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2373  }
2374 
2375  INIT_NESTED_LOCK(lck);
2376  __kmp_set_user_lock_location(lck, loc);
2377 
2378 #if OMPT_SUPPORT && OMPT_OPTIONAL
2379  // This is the case, if called from omp_init_lock_with_hint:
2380  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2381  if (!codeptr)
2382  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2383  if (ompt_enabled.ompt_callback_lock_init) {
2384  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2385  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2386  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2387  }
2388 #endif
2389 
2390 #if USE_ITT_BUILD
2391  __kmp_itt_lock_creating(lck);
2392 #endif /* USE_ITT_BUILD */
2393 
2394 #endif // KMP_USE_DYNAMIC_LOCK
2395 } // __kmpc_init_nest_lock
2396 
2397 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2398 #if KMP_USE_DYNAMIC_LOCK
2399 
2400 #if USE_ITT_BUILD
2401  kmp_user_lock_p lck;
2402  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2403  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2404  } else {
2405  lck = (kmp_user_lock_p)user_lock;
2406  }
2407  __kmp_itt_lock_destroyed(lck);
2408 #endif
2409 #if OMPT_SUPPORT && OMPT_OPTIONAL
2410  // This is the case, if called from omp_init_lock_with_hint:
2411  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2412  if (!codeptr)
2413  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2414  if (ompt_enabled.ompt_callback_lock_destroy) {
2415  kmp_user_lock_p lck;
2416  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2417  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2418  } else {
2419  lck = (kmp_user_lock_p)user_lock;
2420  }
2421  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2422  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2423  }
2424 #endif
2425  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2426 #else
2427  kmp_user_lock_p lck;
2428 
2429  if ((__kmp_user_lock_kind == lk_tas) &&
2430  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2431  lck = (kmp_user_lock_p)user_lock;
2432  }
2433 #if KMP_USE_FUTEX
2434  else if ((__kmp_user_lock_kind == lk_futex) &&
2435  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2436  lck = (kmp_user_lock_p)user_lock;
2437  }
2438 #endif
2439  else {
2440  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2441  }
2442 
2443 #if OMPT_SUPPORT && OMPT_OPTIONAL
2444  // This is the case, if called from omp_init_lock_with_hint:
2445  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2446  if (!codeptr)
2447  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2448  if (ompt_enabled.ompt_callback_lock_destroy) {
2449  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2450  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2451  }
2452 #endif
2453 
2454 #if USE_ITT_BUILD
2455  __kmp_itt_lock_destroyed(lck);
2456 #endif /* USE_ITT_BUILD */
2457  DESTROY_LOCK(lck);
2458 
2459  if ((__kmp_user_lock_kind == lk_tas) &&
2460  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2461  ;
2462  }
2463 #if KMP_USE_FUTEX
2464  else if ((__kmp_user_lock_kind == lk_futex) &&
2465  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2466  ;
2467  }
2468 #endif
2469  else {
2470  __kmp_user_lock_free(user_lock, gtid, lck);
2471  }
2472 #endif // KMP_USE_DYNAMIC_LOCK
2473 } // __kmpc_destroy_lock
2474 
2475 /* destroy the lock */
2476 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2477 #if KMP_USE_DYNAMIC_LOCK
2478 
2479 #if USE_ITT_BUILD
2480  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2481  __kmp_itt_lock_destroyed(ilk->lock);
2482 #endif
2483 #if OMPT_SUPPORT && OMPT_OPTIONAL
2484  // This is the case, if called from omp_init_lock_with_hint:
2485  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2486  if (!codeptr)
2487  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2488  if (ompt_enabled.ompt_callback_lock_destroy) {
2489  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2490  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2491  }
2492 #endif
2493  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2494 
2495 #else // KMP_USE_DYNAMIC_LOCK
2496 
2497  kmp_user_lock_p lck;
2498 
2499  if ((__kmp_user_lock_kind == lk_tas) &&
2500  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2501  OMP_NEST_LOCK_T_SIZE)) {
2502  lck = (kmp_user_lock_p)user_lock;
2503  }
2504 #if KMP_USE_FUTEX
2505  else if ((__kmp_user_lock_kind == lk_futex) &&
2506  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2507  OMP_NEST_LOCK_T_SIZE)) {
2508  lck = (kmp_user_lock_p)user_lock;
2509  }
2510 #endif
2511  else {
2512  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2513  }
2514 
2515 #if OMPT_SUPPORT && OMPT_OPTIONAL
2516  // This is the case, if called from omp_init_lock_with_hint:
2517  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2518  if (!codeptr)
2519  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2520  if (ompt_enabled.ompt_callback_lock_destroy) {
2521  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2522  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2523  }
2524 #endif
2525 
2526 #if USE_ITT_BUILD
2527  __kmp_itt_lock_destroyed(lck);
2528 #endif /* USE_ITT_BUILD */
2529 
2530  DESTROY_NESTED_LOCK(lck);
2531 
2532  if ((__kmp_user_lock_kind == lk_tas) &&
2533  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2534  OMP_NEST_LOCK_T_SIZE)) {
2535  ;
2536  }
2537 #if KMP_USE_FUTEX
2538  else if ((__kmp_user_lock_kind == lk_futex) &&
2539  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2540  OMP_NEST_LOCK_T_SIZE)) {
2541  ;
2542  }
2543 #endif
2544  else {
2545  __kmp_user_lock_free(user_lock, gtid, lck);
2546  }
2547 #endif // KMP_USE_DYNAMIC_LOCK
2548 } // __kmpc_destroy_nest_lock
2549 
2550 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2551  KMP_COUNT_BLOCK(OMP_set_lock);
2552 #if KMP_USE_DYNAMIC_LOCK
2553  int tag = KMP_EXTRACT_D_TAG(user_lock);
2554 #if USE_ITT_BUILD
2555  __kmp_itt_lock_acquiring(
2556  (kmp_user_lock_p)
2557  user_lock); // itt function will get to the right lock object.
2558 #endif
2559 #if OMPT_SUPPORT && OMPT_OPTIONAL
2560  // This is the case, if called from omp_init_lock_with_hint:
2561  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2562  if (!codeptr)
2563  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2564  if (ompt_enabled.ompt_callback_mutex_acquire) {
2565  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2566  ompt_mutex_lock, omp_lock_hint_none,
2567  __ompt_get_mutex_impl_type(user_lock),
2568  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2569  }
2570 #endif
2571 #if KMP_USE_INLINED_TAS
2572  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2573  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2574  } else
2575 #elif KMP_USE_INLINED_FUTEX
2576  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2577  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2578  } else
2579 #endif
2580  {
2581  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2582  }
2583 #if USE_ITT_BUILD
2584  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2585 #endif
2586 #if OMPT_SUPPORT && OMPT_OPTIONAL
2587  if (ompt_enabled.ompt_callback_mutex_acquired) {
2588  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2589  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2590  }
2591 #endif
2592 
2593 #else // KMP_USE_DYNAMIC_LOCK
2594 
2595  kmp_user_lock_p lck;
2596 
2597  if ((__kmp_user_lock_kind == lk_tas) &&
2598  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2599  lck = (kmp_user_lock_p)user_lock;
2600  }
2601 #if KMP_USE_FUTEX
2602  else if ((__kmp_user_lock_kind == lk_futex) &&
2603  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2604  lck = (kmp_user_lock_p)user_lock;
2605  }
2606 #endif
2607  else {
2608  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2609  }
2610 
2611 #if USE_ITT_BUILD
2612  __kmp_itt_lock_acquiring(lck);
2613 #endif /* USE_ITT_BUILD */
2614 #if OMPT_SUPPORT && OMPT_OPTIONAL
2615  // This is the case, if called from omp_init_lock_with_hint:
2616  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617  if (!codeptr)
2618  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619  if (ompt_enabled.ompt_callback_mutex_acquire) {
2620  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2621  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2622  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2623  }
2624 #endif
2625 
2626  ACQUIRE_LOCK(lck, gtid);
2627 
2628 #if USE_ITT_BUILD
2629  __kmp_itt_lock_acquired(lck);
2630 #endif /* USE_ITT_BUILD */
2631 
2632 #if OMPT_SUPPORT && OMPT_OPTIONAL
2633  if (ompt_enabled.ompt_callback_mutex_acquired) {
2634  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2635  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2636  }
2637 #endif
2638 
2639 #endif // KMP_USE_DYNAMIC_LOCK
2640 }
2641 
2642 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2643 #if KMP_USE_DYNAMIC_LOCK
2644 
2645 #if USE_ITT_BUILD
2646  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2647 #endif
2648 #if OMPT_SUPPORT && OMPT_OPTIONAL
2649  // This is the case, if called from omp_init_lock_with_hint:
2650  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2651  if (!codeptr)
2652  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2653  if (ompt_enabled.enabled) {
2654  if (ompt_enabled.ompt_callback_mutex_acquire) {
2655  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2656  ompt_mutex_nest_lock, omp_lock_hint_none,
2657  __ompt_get_mutex_impl_type(user_lock),
2658  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2659  }
2660  }
2661 #endif
2662  int acquire_status =
2663  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2664  (void) acquire_status;
2665 #if USE_ITT_BUILD
2666  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2667 #endif
2668 
2669 #if OMPT_SUPPORT && OMPT_OPTIONAL
2670  if (ompt_enabled.enabled) {
2671  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2672  if (ompt_enabled.ompt_callback_mutex_acquired) {
2673  // lock_first
2674  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2675  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2676  codeptr);
2677  }
2678  } else {
2679  if (ompt_enabled.ompt_callback_nest_lock) {
2680  // lock_next
2681  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2682  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2683  }
2684  }
2685  }
2686 #endif
2687 
2688 #else // KMP_USE_DYNAMIC_LOCK
2689  int acquire_status;
2690  kmp_user_lock_p lck;
2691 
2692  if ((__kmp_user_lock_kind == lk_tas) &&
2693  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2694  OMP_NEST_LOCK_T_SIZE)) {
2695  lck = (kmp_user_lock_p)user_lock;
2696  }
2697 #if KMP_USE_FUTEX
2698  else if ((__kmp_user_lock_kind == lk_futex) &&
2699  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2700  OMP_NEST_LOCK_T_SIZE)) {
2701  lck = (kmp_user_lock_p)user_lock;
2702  }
2703 #endif
2704  else {
2705  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2706  }
2707 
2708 #if USE_ITT_BUILD
2709  __kmp_itt_lock_acquiring(lck);
2710 #endif /* USE_ITT_BUILD */
2711 #if OMPT_SUPPORT && OMPT_OPTIONAL
2712  // This is the case, if called from omp_init_lock_with_hint:
2713  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2714  if (!codeptr)
2715  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2716  if (ompt_enabled.enabled) {
2717  if (ompt_enabled.ompt_callback_mutex_acquire) {
2718  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2719  ompt_mutex_nest_lock, omp_lock_hint_none,
2720  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2721  codeptr);
2722  }
2723  }
2724 #endif
2725 
2726  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2727 
2728 #if USE_ITT_BUILD
2729  __kmp_itt_lock_acquired(lck);
2730 #endif /* USE_ITT_BUILD */
2731 
2732 #if OMPT_SUPPORT && OMPT_OPTIONAL
2733  if (ompt_enabled.enabled) {
2734  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2735  if (ompt_enabled.ompt_callback_mutex_acquired) {
2736  // lock_first
2737  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2738  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2739  }
2740  } else {
2741  if (ompt_enabled.ompt_callback_nest_lock) {
2742  // lock_next
2743  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2744  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2745  }
2746  }
2747  }
2748 #endif
2749 
2750 #endif // KMP_USE_DYNAMIC_LOCK
2751 }
2752 
2753 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2754 #if KMP_USE_DYNAMIC_LOCK
2755 
2756  int tag = KMP_EXTRACT_D_TAG(user_lock);
2757 #if USE_ITT_BUILD
2758  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2759 #endif
2760 #if KMP_USE_INLINED_TAS
2761  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2762  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2763  } else
2764 #elif KMP_USE_INLINED_FUTEX
2765  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2766  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2767  } else
2768 #endif
2769  {
2770  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2771  }
2772 
2773 #if OMPT_SUPPORT && OMPT_OPTIONAL
2774  // This is the case, if called from omp_init_lock_with_hint:
2775  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2776  if (!codeptr)
2777  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2778  if (ompt_enabled.ompt_callback_mutex_released) {
2779  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2780  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2781  }
2782 #endif
2783 
2784 #else // KMP_USE_DYNAMIC_LOCK
2785 
2786  kmp_user_lock_p lck;
2787 
2788  /* Can't use serial interval since not block structured */
2789  /* release the lock */
2790 
2791  if ((__kmp_user_lock_kind == lk_tas) &&
2792  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2793 #if KMP_OS_LINUX && \
2794  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2795 // "fast" path implemented to fix customer performance issue
2796 #if USE_ITT_BUILD
2797  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2798 #endif /* USE_ITT_BUILD */
2799  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2800  KMP_MB();
2801 
2802 #if OMPT_SUPPORT && OMPT_OPTIONAL
2803  // This is the case, if called from omp_init_lock_with_hint:
2804  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2805  if (!codeptr)
2806  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2807  if (ompt_enabled.ompt_callback_mutex_released) {
2808  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2809  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2810  }
2811 #endif
2812 
2813  return;
2814 #else
2815  lck = (kmp_user_lock_p)user_lock;
2816 #endif
2817  }
2818 #if KMP_USE_FUTEX
2819  else if ((__kmp_user_lock_kind == lk_futex) &&
2820  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2821  lck = (kmp_user_lock_p)user_lock;
2822  }
2823 #endif
2824  else {
2825  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2826  }
2827 
2828 #if USE_ITT_BUILD
2829  __kmp_itt_lock_releasing(lck);
2830 #endif /* USE_ITT_BUILD */
2831 
2832  RELEASE_LOCK(lck, gtid);
2833 
2834 #if OMPT_SUPPORT && OMPT_OPTIONAL
2835  // This is the case, if called from omp_init_lock_with_hint:
2836  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2837  if (!codeptr)
2838  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2839  if (ompt_enabled.ompt_callback_mutex_released) {
2840  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2841  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2842  }
2843 #endif
2844 
2845 #endif // KMP_USE_DYNAMIC_LOCK
2846 }
2847 
2848 /* release the lock */
2849 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2850 #if KMP_USE_DYNAMIC_LOCK
2851 
2852 #if USE_ITT_BUILD
2853  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2854 #endif
2855  int release_status =
2856  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2857  (void) release_status;
2858 
2859 #if OMPT_SUPPORT && OMPT_OPTIONAL
2860  // This is the case, if called from omp_init_lock_with_hint:
2861  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2862  if (!codeptr)
2863  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2864  if (ompt_enabled.enabled) {
2865  if (release_status == KMP_LOCK_RELEASED) {
2866  if (ompt_enabled.ompt_callback_mutex_released) {
2867  // release_lock_last
2868  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2869  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2870  codeptr);
2871  }
2872  } else if (ompt_enabled.ompt_callback_nest_lock) {
2873  // release_lock_prev
2874  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2875  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2876  }
2877  }
2878 #endif
2879 
2880 #else // KMP_USE_DYNAMIC_LOCK
2881 
2882  kmp_user_lock_p lck;
2883 
2884  /* Can't use serial interval since not block structured */
2885 
2886  if ((__kmp_user_lock_kind == lk_tas) &&
2887  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2888  OMP_NEST_LOCK_T_SIZE)) {
2889 #if KMP_OS_LINUX && \
2890  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2891  // "fast" path implemented to fix customer performance issue
2892  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2893 #if USE_ITT_BUILD
2894  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2895 #endif /* USE_ITT_BUILD */
2896 
2897 #if OMPT_SUPPORT && OMPT_OPTIONAL
2898  int release_status = KMP_LOCK_STILL_HELD;
2899 #endif
2900 
2901  if (--(tl->lk.depth_locked) == 0) {
2902  TCW_4(tl->lk.poll, 0);
2903 #if OMPT_SUPPORT && OMPT_OPTIONAL
2904  release_status = KMP_LOCK_RELEASED;
2905 #endif
2906  }
2907  KMP_MB();
2908 
2909 #if OMPT_SUPPORT && OMPT_OPTIONAL
2910  // This is the case, if called from omp_init_lock_with_hint:
2911  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2912  if (!codeptr)
2913  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2914  if (ompt_enabled.enabled) {
2915  if (release_status == KMP_LOCK_RELEASED) {
2916  if (ompt_enabled.ompt_callback_mutex_released) {
2917  // release_lock_last
2918  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2919  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2920  }
2921  } else if (ompt_enabled.ompt_callback_nest_lock) {
2922  // release_lock_previous
2923  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2924  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2925  }
2926  }
2927 #endif
2928 
2929  return;
2930 #else
2931  lck = (kmp_user_lock_p)user_lock;
2932 #endif
2933  }
2934 #if KMP_USE_FUTEX
2935  else if ((__kmp_user_lock_kind == lk_futex) &&
2936  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2937  OMP_NEST_LOCK_T_SIZE)) {
2938  lck = (kmp_user_lock_p)user_lock;
2939  }
2940 #endif
2941  else {
2942  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2943  }
2944 
2945 #if USE_ITT_BUILD
2946  __kmp_itt_lock_releasing(lck);
2947 #endif /* USE_ITT_BUILD */
2948 
2949  int release_status;
2950  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2951 #if OMPT_SUPPORT && OMPT_OPTIONAL
2952  // This is the case, if called from omp_init_lock_with_hint:
2953  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2954  if (!codeptr)
2955  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2956  if (ompt_enabled.enabled) {
2957  if (release_status == KMP_LOCK_RELEASED) {
2958  if (ompt_enabled.ompt_callback_mutex_released) {
2959  // release_lock_last
2960  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2961  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2962  }
2963  } else if (ompt_enabled.ompt_callback_nest_lock) {
2964  // release_lock_previous
2965  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2966  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2967  }
2968  }
2969 #endif
2970 
2971 #endif // KMP_USE_DYNAMIC_LOCK
2972 }
2973 
2974 /* try to acquire the lock */
2975 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2976  KMP_COUNT_BLOCK(OMP_test_lock);
2977 
2978 #if KMP_USE_DYNAMIC_LOCK
2979  int rc;
2980  int tag = KMP_EXTRACT_D_TAG(user_lock);
2981 #if USE_ITT_BUILD
2982  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2983 #endif
2984 #if OMPT_SUPPORT && OMPT_OPTIONAL
2985  // This is the case, if called from omp_init_lock_with_hint:
2986  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2987  if (!codeptr)
2988  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2989  if (ompt_enabled.ompt_callback_mutex_acquire) {
2990  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2991  ompt_mutex_lock, omp_lock_hint_none,
2992  __ompt_get_mutex_impl_type(user_lock),
2993  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2994  }
2995 #endif
2996 #if KMP_USE_INLINED_TAS
2997  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2998  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2999  } else
3000 #elif KMP_USE_INLINED_FUTEX
3001  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3002  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3003  } else
3004 #endif
3005  {
3006  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3007  }
3008  if (rc) {
3009 #if USE_ITT_BUILD
3010  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3011 #endif
3012 #if OMPT_SUPPORT && OMPT_OPTIONAL
3013  if (ompt_enabled.ompt_callback_mutex_acquired) {
3014  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3015  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3016  }
3017 #endif
3018  return FTN_TRUE;
3019  } else {
3020 #if USE_ITT_BUILD
3021  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3022 #endif
3023  return FTN_FALSE;
3024  }
3025 
3026 #else // KMP_USE_DYNAMIC_LOCK
3027 
3028  kmp_user_lock_p lck;
3029  int rc;
3030 
3031  if ((__kmp_user_lock_kind == lk_tas) &&
3032  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033  lck = (kmp_user_lock_p)user_lock;
3034  }
3035 #if KMP_USE_FUTEX
3036  else if ((__kmp_user_lock_kind == lk_futex) &&
3037  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3038  lck = (kmp_user_lock_p)user_lock;
3039  }
3040 #endif
3041  else {
3042  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3043  }
3044 
3045 #if USE_ITT_BUILD
3046  __kmp_itt_lock_acquiring(lck);
3047 #endif /* USE_ITT_BUILD */
3048 #if OMPT_SUPPORT && OMPT_OPTIONAL
3049  // This is the case, if called from omp_init_lock_with_hint:
3050  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051  if (!codeptr)
3052  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3053  if (ompt_enabled.ompt_callback_mutex_acquire) {
3054  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3055  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3056  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3057  }
3058 #endif
3059 
3060  rc = TEST_LOCK(lck, gtid);
3061 #if USE_ITT_BUILD
3062  if (rc) {
3063  __kmp_itt_lock_acquired(lck);
3064  } else {
3065  __kmp_itt_lock_cancelled(lck);
3066  }
3067 #endif /* USE_ITT_BUILD */
3068 #if OMPT_SUPPORT && OMPT_OPTIONAL
3069  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3070  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3071  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3072  }
3073 #endif
3074 
3075  return (rc ? FTN_TRUE : FTN_FALSE);
3076 
3077 /* Can't use serial interval since not block structured */
3078 
3079 #endif // KMP_USE_DYNAMIC_LOCK
3080 }
3081 
3082 /* try to acquire the lock */
3083 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3084 #if KMP_USE_DYNAMIC_LOCK
3085  int rc;
3086 #if USE_ITT_BUILD
3087  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3088 #endif
3089 #if OMPT_SUPPORT && OMPT_OPTIONAL
3090  // This is the case, if called from omp_init_lock_with_hint:
3091  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3092  if (!codeptr)
3093  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3094  if (ompt_enabled.ompt_callback_mutex_acquire) {
3095  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3096  ompt_mutex_nest_lock, omp_lock_hint_none,
3097  __ompt_get_mutex_impl_type(user_lock),
3098  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3099  }
3100 #endif
3101  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3102 #if USE_ITT_BUILD
3103  if (rc) {
3104  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3105  } else {
3106  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3107  }
3108 #endif
3109 #if OMPT_SUPPORT && OMPT_OPTIONAL
3110  if (ompt_enabled.enabled && rc) {
3111  if (rc == 1) {
3112  if (ompt_enabled.ompt_callback_mutex_acquired) {
3113  // lock_first
3114  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3115  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3116  codeptr);
3117  }
3118  } else {
3119  if (ompt_enabled.ompt_callback_nest_lock) {
3120  // lock_next
3121  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3122  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3123  }
3124  }
3125  }
3126 #endif
3127  return rc;
3128 
3129 #else // KMP_USE_DYNAMIC_LOCK
3130 
3131  kmp_user_lock_p lck;
3132  int rc;
3133 
3134  if ((__kmp_user_lock_kind == lk_tas) &&
3135  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3136  OMP_NEST_LOCK_T_SIZE)) {
3137  lck = (kmp_user_lock_p)user_lock;
3138  }
3139 #if KMP_USE_FUTEX
3140  else if ((__kmp_user_lock_kind == lk_futex) &&
3141  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3142  OMP_NEST_LOCK_T_SIZE)) {
3143  lck = (kmp_user_lock_p)user_lock;
3144  }
3145 #endif
3146  else {
3147  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3148  }
3149 
3150 #if USE_ITT_BUILD
3151  __kmp_itt_lock_acquiring(lck);
3152 #endif /* USE_ITT_BUILD */
3153 
3154 #if OMPT_SUPPORT && OMPT_OPTIONAL
3155  // This is the case, if called from omp_init_lock_with_hint:
3156  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3157  if (!codeptr)
3158  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3159  if (ompt_enabled.enabled) &&
3160  ompt_enabled.ompt_callback_mutex_acquire) {
3161  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3162  ompt_mutex_nest_lock, omp_lock_hint_none,
3163  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3164  codeptr);
3165  }
3166 #endif
3167 
3168  rc = TEST_NESTED_LOCK(lck, gtid);
3169 #if USE_ITT_BUILD
3170  if (rc) {
3171  __kmp_itt_lock_acquired(lck);
3172  } else {
3173  __kmp_itt_lock_cancelled(lck);
3174  }
3175 #endif /* USE_ITT_BUILD */
3176 #if OMPT_SUPPORT && OMPT_OPTIONAL
3177  if (ompt_enabled.enabled && rc) {
3178  if (rc == 1) {
3179  if (ompt_enabled.ompt_callback_mutex_acquired) {
3180  // lock_first
3181  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3182  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3183  }
3184  } else {
3185  if (ompt_enabled.ompt_callback_nest_lock) {
3186  // lock_next
3187  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3188  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3189  }
3190  }
3191  }
3192 #endif
3193  return rc;
3194 
3195 /* Can't use serial interval since not block structured */
3196 
3197 #endif // KMP_USE_DYNAMIC_LOCK
3198 }
3199 
3200 // Interface to fast scalable reduce methods routines
3201 
3202 // keep the selected method in a thread local structure for cross-function
3203 // usage: will be used in __kmpc_end_reduce* functions;
3204 // another solution: to re-determine the method one more time in
3205 // __kmpc_end_reduce* functions (new prototype required then)
3206 // AT: which solution is better?
3207 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3208  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3209 
3210 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3211  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3212 
3213 // description of the packed_reduction_method variable: look at the macros in
3214 // kmp.h
3215 
3216 // used in a critical section reduce block
3217 static __forceinline void
3218 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3219  kmp_critical_name *crit) {
3220 
3221  // this lock was visible to a customer and to the threading profile tool as a
3222  // serial overhead span (although it's used for an internal purpose only)
3223  // why was it visible in previous implementation?
3224  // should we keep it visible in new reduce block?
3225  kmp_user_lock_p lck;
3226 
3227 #if KMP_USE_DYNAMIC_LOCK
3228 
3229  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3230  // Check if it is initialized.
3231  if (*lk == 0) {
3232  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3233  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3234  KMP_GET_D_TAG(__kmp_user_lock_seq));
3235  } else {
3236  __kmp_init_indirect_csptr(crit, loc, global_tid,
3237  KMP_GET_I_TAG(__kmp_user_lock_seq));
3238  }
3239  }
3240  // Branch for accessing the actual lock object and set operation. This
3241  // branching is inevitable since this lock initialization does not follow the
3242  // normal dispatch path (lock table is not used).
3243  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3244  lck = (kmp_user_lock_p)lk;
3245  KMP_DEBUG_ASSERT(lck != NULL);
3246  if (__kmp_env_consistency_check) {
3247  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3248  }
3249  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3250  } else {
3251  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3252  lck = ilk->lock;
3253  KMP_DEBUG_ASSERT(lck != NULL);
3254  if (__kmp_env_consistency_check) {
3255  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3256  }
3257  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3258  }
3259 
3260 #else // KMP_USE_DYNAMIC_LOCK
3261 
3262  // We know that the fast reduction code is only emitted by Intel compilers
3263  // with 32 byte critical sections. If there isn't enough space, then we
3264  // have to use a pointer.
3265  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3266  lck = (kmp_user_lock_p)crit;
3267  } else {
3268  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3269  }
3270  KMP_DEBUG_ASSERT(lck != NULL);
3271 
3272  if (__kmp_env_consistency_check)
3273  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3274 
3275  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3276 
3277 #endif // KMP_USE_DYNAMIC_LOCK
3278 }
3279 
3280 // used in a critical section reduce block
3281 static __forceinline void
3282 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3283  kmp_critical_name *crit) {
3284 
3285  kmp_user_lock_p lck;
3286 
3287 #if KMP_USE_DYNAMIC_LOCK
3288 
3289  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3290  lck = (kmp_user_lock_p)crit;
3291  if (__kmp_env_consistency_check)
3292  __kmp_pop_sync(global_tid, ct_critical, loc);
3293  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3294  } else {
3295  kmp_indirect_lock_t *ilk =
3296  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3297  if (__kmp_env_consistency_check)
3298  __kmp_pop_sync(global_tid, ct_critical, loc);
3299  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3300  }
3301 
3302 #else // KMP_USE_DYNAMIC_LOCK
3303 
3304  // We know that the fast reduction code is only emitted by Intel compilers
3305  // with 32 byte critical sections. If there isn't enough space, then we have
3306  // to use a pointer.
3307  if (__kmp_base_user_lock_size > 32) {
3308  lck = *((kmp_user_lock_p *)crit);
3309  KMP_ASSERT(lck != NULL);
3310  } else {
3311  lck = (kmp_user_lock_p)crit;
3312  }
3313 
3314  if (__kmp_env_consistency_check)
3315  __kmp_pop_sync(global_tid, ct_critical, loc);
3316 
3317  __kmp_release_user_lock_with_checks(lck, global_tid);
3318 
3319 #endif // KMP_USE_DYNAMIC_LOCK
3320 } // __kmp_end_critical_section_reduce_block
3321 
3322 static __forceinline int
3323 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3324  int *task_state) {
3325  kmp_team_t *team;
3326 
3327  // Check if we are inside the teams construct?
3328  if (th->th.th_teams_microtask) {
3329  *team_p = team = th->th.th_team;
3330  if (team->t.t_level == th->th.th_teams_level) {
3331  // This is reduction at teams construct.
3332  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3333  // Let's swap teams temporarily for the reduction.
3334  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3335  th->th.th_team = team->t.t_parent;
3336  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3337  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3338  *task_state = th->th.th_task_state;
3339  th->th.th_task_state = 0;
3340 
3341  return 1;
3342  }
3343  }
3344  return 0;
3345 }
3346 
3347 static __forceinline void
3348 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3349  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3350  th->th.th_info.ds.ds_tid = 0;
3351  th->th.th_team = team;
3352  th->th.th_team_nproc = team->t.t_nproc;
3353  th->th.th_task_team = team->t.t_task_team[task_state];
3354  th->th.th_task_state = task_state;
3355 }
3356 
3357 /* 2.a.i. Reduce Block without a terminating barrier */
3373 kmp_int32
3374 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3375  size_t reduce_size, void *reduce_data,
3376  void (*reduce_func)(void *lhs_data, void *rhs_data),
3377  kmp_critical_name *lck) {
3378 
3379  KMP_COUNT_BLOCK(REDUCE_nowait);
3380  int retval = 0;
3381  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3382  kmp_info_t *th;
3383  kmp_team_t *team;
3384  int teams_swapped = 0, task_state;
3385  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3386 
3387  // why do we need this initialization here at all?
3388  // Reduction clause can not be used as a stand-alone directive.
3389 
3390  // do not call __kmp_serial_initialize(), it will be called by
3391  // __kmp_parallel_initialize() if needed
3392  // possible detection of false-positive race by the threadchecker ???
3393  if (!TCR_4(__kmp_init_parallel))
3394  __kmp_parallel_initialize();
3395 
3396  __kmp_resume_if_soft_paused();
3397 
3398 // check correctness of reduce block nesting
3399 #if KMP_USE_DYNAMIC_LOCK
3400  if (__kmp_env_consistency_check)
3401  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3402 #else
3403  if (__kmp_env_consistency_check)
3404  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3405 #endif
3406 
3407  th = __kmp_thread_from_gtid(global_tid);
3408  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3409 
3410  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3411  // the value should be kept in a variable
3412  // the variable should be either a construct-specific or thread-specific
3413  // property, not a team specific property
3414  // (a thread can reach the next reduce block on the next construct, reduce
3415  // method may differ on the next construct)
3416  // an ident_t "loc" parameter could be used as a construct-specific property
3417  // (what if loc == 0?)
3418  // (if both construct-specific and team-specific variables were shared,
3419  // then unness extra syncs should be needed)
3420  // a thread-specific variable is better regarding two issues above (next
3421  // construct and extra syncs)
3422  // a thread-specific "th_local.reduction_method" variable is used currently
3423  // each thread executes 'determine' and 'set' lines (no need to execute by one
3424  // thread, to avoid unness extra syncs)
3425 
3426  packed_reduction_method = __kmp_determine_reduction_method(
3427  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3428  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3429 
3430  if (packed_reduction_method == critical_reduce_block) {
3431 
3432  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3433  retval = 1;
3434 
3435  } else if (packed_reduction_method == empty_reduce_block) {
3436 
3437  // usage: if team size == 1, no synchronization is required ( Intel
3438  // platforms only )
3439  retval = 1;
3440 
3441  } else if (packed_reduction_method == atomic_reduce_block) {
3442 
3443  retval = 2;
3444 
3445  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3446  // won't be called by the code gen)
3447  // (it's not quite good, because the checking block has been closed by
3448  // this 'pop',
3449  // but atomic operation has not been executed yet, will be executed
3450  // slightly later, literally on next instruction)
3451  if (__kmp_env_consistency_check)
3452  __kmp_pop_sync(global_tid, ct_reduce, loc);
3453 
3454  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3455  tree_reduce_block)) {
3456 
3457 // AT: performance issue: a real barrier here
3458 // AT: (if master goes slow, other threads are blocked here waiting for the
3459 // master to come and release them)
3460 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3461 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3462 // be confusing to a customer)
3463 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3464 // might go faster and be more in line with sense of NOWAIT
3465 // AT: TO DO: do epcc test and compare times
3466 
3467 // this barrier should be invisible to a customer and to the threading profile
3468 // tool (it's neither a terminating barrier nor customer's code, it's
3469 // used for an internal purpose)
3470 #if OMPT_SUPPORT
3471  // JP: can this barrier potentially leed to task scheduling?
3472  // JP: as long as there is a barrier in the implementation, OMPT should and
3473  // will provide the barrier events
3474  // so we set-up the necessary frame/return addresses.
3475  ompt_frame_t *ompt_frame;
3476  if (ompt_enabled.enabled) {
3477  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3478  if (ompt_frame->enter_frame.ptr == NULL)
3479  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3480  OMPT_STORE_RETURN_ADDRESS(global_tid);
3481  }
3482 #endif
3483 #if USE_ITT_NOTIFY
3484  __kmp_threads[global_tid]->th.th_ident = loc;
3485 #endif
3486  retval =
3487  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3488  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3489  retval = (retval != 0) ? (0) : (1);
3490 #if OMPT_SUPPORT && OMPT_OPTIONAL
3491  if (ompt_enabled.enabled) {
3492  ompt_frame->enter_frame = ompt_data_none;
3493  }
3494 #endif
3495 
3496  // all other workers except master should do this pop here
3497  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3498  if (__kmp_env_consistency_check) {
3499  if (retval == 0) {
3500  __kmp_pop_sync(global_tid, ct_reduce, loc);
3501  }
3502  }
3503 
3504  } else {
3505 
3506  // should never reach this block
3507  KMP_ASSERT(0); // "unexpected method"
3508  }
3509  if (teams_swapped) {
3510  __kmp_restore_swapped_teams(th, team, task_state);
3511  }
3512  KA_TRACE(
3513  10,
3514  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3515  global_tid, packed_reduction_method, retval));
3516 
3517  return retval;
3518 }
3519 
3528 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3529  kmp_critical_name *lck) {
3530 
3531  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3532 
3533  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3534 
3535  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3536 
3537  if (packed_reduction_method == critical_reduce_block) {
3538 
3539  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3540 
3541  } else if (packed_reduction_method == empty_reduce_block) {
3542 
3543  // usage: if team size == 1, no synchronization is required ( on Intel
3544  // platforms only )
3545 
3546  } else if (packed_reduction_method == atomic_reduce_block) {
3547 
3548  // neither master nor other workers should get here
3549  // (code gen does not generate this call in case 2: atomic reduce block)
3550  // actually it's better to remove this elseif at all;
3551  // after removal this value will checked by the 'else' and will assert
3552 
3553  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3554  tree_reduce_block)) {
3555 
3556  // only master gets here
3557 
3558  } else {
3559 
3560  // should never reach this block
3561  KMP_ASSERT(0); // "unexpected method"
3562  }
3563 
3564  if (__kmp_env_consistency_check)
3565  __kmp_pop_sync(global_tid, ct_reduce, loc);
3566 
3567  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3568  global_tid, packed_reduction_method));
3569 
3570  return;
3571 }
3572 
3573 /* 2.a.ii. Reduce Block with a terminating barrier */
3574 
3590 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3591  size_t reduce_size, void *reduce_data,
3592  void (*reduce_func)(void *lhs_data, void *rhs_data),
3593  kmp_critical_name *lck) {
3594  KMP_COUNT_BLOCK(REDUCE_wait);
3595  int retval = 0;
3596  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3597  kmp_info_t *th;
3598  kmp_team_t *team;
3599  int teams_swapped = 0, task_state;
3600 
3601  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3602 
3603  // why do we need this initialization here at all?
3604  // Reduction clause can not be a stand-alone directive.
3605 
3606  // do not call __kmp_serial_initialize(), it will be called by
3607  // __kmp_parallel_initialize() if needed
3608  // possible detection of false-positive race by the threadchecker ???
3609  if (!TCR_4(__kmp_init_parallel))
3610  __kmp_parallel_initialize();
3611 
3612  __kmp_resume_if_soft_paused();
3613 
3614 // check correctness of reduce block nesting
3615 #if KMP_USE_DYNAMIC_LOCK
3616  if (__kmp_env_consistency_check)
3617  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3618 #else
3619  if (__kmp_env_consistency_check)
3620  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3621 #endif
3622 
3623  th = __kmp_thread_from_gtid(global_tid);
3624  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3625 
3626  packed_reduction_method = __kmp_determine_reduction_method(
3627  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3628  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3629 
3630  if (packed_reduction_method == critical_reduce_block) {
3631 
3632  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3633  retval = 1;
3634 
3635  } else if (packed_reduction_method == empty_reduce_block) {
3636 
3637  // usage: if team size == 1, no synchronization is required ( Intel
3638  // platforms only )
3639  retval = 1;
3640 
3641  } else if (packed_reduction_method == atomic_reduce_block) {
3642 
3643  retval = 2;
3644 
3645  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3646  tree_reduce_block)) {
3647 
3648 // case tree_reduce_block:
3649 // this barrier should be visible to a customer and to the threading profile
3650 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3651 #if OMPT_SUPPORT
3652  ompt_frame_t *ompt_frame;
3653  if (ompt_enabled.enabled) {
3654  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3655  if (ompt_frame->enter_frame.ptr == NULL)
3656  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3657  OMPT_STORE_RETURN_ADDRESS(global_tid);
3658  }
3659 #endif
3660 #if USE_ITT_NOTIFY
3661  __kmp_threads[global_tid]->th.th_ident =
3662  loc; // needed for correct notification of frames
3663 #endif
3664  retval =
3665  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3666  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3667  retval = (retval != 0) ? (0) : (1);
3668 #if OMPT_SUPPORT && OMPT_OPTIONAL
3669  if (ompt_enabled.enabled) {
3670  ompt_frame->enter_frame = ompt_data_none;
3671  }
3672 #endif
3673 
3674  // all other workers except master should do this pop here
3675  // ( none of other workers except master will enter __kmpc_end_reduce() )
3676  if (__kmp_env_consistency_check) {
3677  if (retval == 0) { // 0: all other workers; 1: master
3678  __kmp_pop_sync(global_tid, ct_reduce, loc);
3679  }
3680  }
3681 
3682  } else {
3683 
3684  // should never reach this block
3685  KMP_ASSERT(0); // "unexpected method"
3686  }
3687  if (teams_swapped) {
3688  __kmp_restore_swapped_teams(th, team, task_state);
3689  }
3690 
3691  KA_TRACE(10,
3692  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3693  global_tid, packed_reduction_method, retval));
3694  return retval;
3695 }
3696 
3707 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3708  kmp_critical_name *lck) {
3709 
3710  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3711  kmp_info_t *th;
3712  kmp_team_t *team;
3713  int teams_swapped = 0, task_state;
3714 
3715  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3716 
3717  th = __kmp_thread_from_gtid(global_tid);
3718  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3719 
3720  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3721 
3722  // this barrier should be visible to a customer and to the threading profile
3723  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3724 
3725  if (packed_reduction_method == critical_reduce_block) {
3726  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3727 
3728 // TODO: implicit barrier: should be exposed
3729 #if OMPT_SUPPORT
3730  ompt_frame_t *ompt_frame;
3731  if (ompt_enabled.enabled) {
3732  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3733  if (ompt_frame->enter_frame.ptr == NULL)
3734  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3735  OMPT_STORE_RETURN_ADDRESS(global_tid);
3736  }
3737 #endif
3738 #if USE_ITT_NOTIFY
3739  __kmp_threads[global_tid]->th.th_ident = loc;
3740 #endif
3741  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3742 #if OMPT_SUPPORT && OMPT_OPTIONAL
3743  if (ompt_enabled.enabled) {
3744  ompt_frame->enter_frame = ompt_data_none;
3745  }
3746 #endif
3747 
3748  } else if (packed_reduction_method == empty_reduce_block) {
3749 
3750 // usage: if team size==1, no synchronization is required (Intel platforms only)
3751 
3752 // TODO: implicit barrier: should be exposed
3753 #if OMPT_SUPPORT
3754  ompt_frame_t *ompt_frame;
3755  if (ompt_enabled.enabled) {
3756  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3757  if (ompt_frame->enter_frame.ptr == NULL)
3758  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3759  OMPT_STORE_RETURN_ADDRESS(global_tid);
3760  }
3761 #endif
3762 #if USE_ITT_NOTIFY
3763  __kmp_threads[global_tid]->th.th_ident = loc;
3764 #endif
3765  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3766 #if OMPT_SUPPORT && OMPT_OPTIONAL
3767  if (ompt_enabled.enabled) {
3768  ompt_frame->enter_frame = ompt_data_none;
3769  }
3770 #endif
3771 
3772  } else if (packed_reduction_method == atomic_reduce_block) {
3773 
3774 #if OMPT_SUPPORT
3775  ompt_frame_t *ompt_frame;
3776  if (ompt_enabled.enabled) {
3777  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3778  if (ompt_frame->enter_frame.ptr == NULL)
3779  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3780  OMPT_STORE_RETURN_ADDRESS(global_tid);
3781  }
3782 #endif
3783 // TODO: implicit barrier: should be exposed
3784 #if USE_ITT_NOTIFY
3785  __kmp_threads[global_tid]->th.th_ident = loc;
3786 #endif
3787  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3788 #if OMPT_SUPPORT && OMPT_OPTIONAL
3789  if (ompt_enabled.enabled) {
3790  ompt_frame->enter_frame = ompt_data_none;
3791  }
3792 #endif
3793 
3794  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3795  tree_reduce_block)) {
3796 
3797  // only master executes here (master releases all other workers)
3798  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3799  global_tid);
3800 
3801  } else {
3802 
3803  // should never reach this block
3804  KMP_ASSERT(0); // "unexpected method"
3805  }
3806  if (teams_swapped) {
3807  __kmp_restore_swapped_teams(th, team, task_state);
3808  }
3809 
3810  if (__kmp_env_consistency_check)
3811  __kmp_pop_sync(global_tid, ct_reduce, loc);
3812 
3813  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3814  global_tid, packed_reduction_method));
3815 
3816  return;
3817 }
3818 
3819 #undef __KMP_GET_REDUCTION_METHOD
3820 #undef __KMP_SET_REDUCTION_METHOD
3821 
3822 /* end of interface to fast scalable reduce routines */
3823 
3824 kmp_uint64 __kmpc_get_taskid() {
3825 
3826  kmp_int32 gtid;
3827  kmp_info_t *thread;
3828 
3829  gtid = __kmp_get_gtid();
3830  if (gtid < 0) {
3831  return 0;
3832  }
3833  thread = __kmp_thread_from_gtid(gtid);
3834  return thread->th.th_current_task->td_task_id;
3835 
3836 } // __kmpc_get_taskid
3837 
3838 kmp_uint64 __kmpc_get_parent_taskid() {
3839 
3840  kmp_int32 gtid;
3841  kmp_info_t *thread;
3842  kmp_taskdata_t *parent_task;
3843 
3844  gtid = __kmp_get_gtid();
3845  if (gtid < 0) {
3846  return 0;
3847  }
3848  thread = __kmp_thread_from_gtid(gtid);
3849  parent_task = thread->th.th_current_task->td_parent;
3850  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3851 
3852 } // __kmpc_get_parent_taskid
3853 
3865 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3866  const struct kmp_dim *dims) {
3867  int j, idx;
3868  kmp_int64 last, trace_count;
3869  kmp_info_t *th = __kmp_threads[gtid];
3870  kmp_team_t *team = th->th.th_team;
3871  kmp_uint32 *flags;
3872  kmp_disp_t *pr_buf = th->th.th_dispatch;
3873  dispatch_shared_info_t *sh_buf;
3874 
3875  KA_TRACE(
3876  20,
3877  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3878  gtid, num_dims, !team->t.t_serialized));
3879  KMP_DEBUG_ASSERT(dims != NULL);
3880  KMP_DEBUG_ASSERT(num_dims > 0);
3881 
3882  if (team->t.t_serialized) {
3883  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3884  return; // no dependencies if team is serialized
3885  }
3886  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3887  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3888  // the next loop
3889  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3890 
3891  // Save bounds info into allocated private buffer
3892  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3893  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3894  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3895  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3896  pr_buf->th_doacross_info[0] =
3897  (kmp_int64)num_dims; // first element is number of dimensions
3898  // Save also address of num_done in order to access it later without knowing
3899  // the buffer index
3900  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3901  pr_buf->th_doacross_info[2] = dims[0].lo;
3902  pr_buf->th_doacross_info[3] = dims[0].up;
3903  pr_buf->th_doacross_info[4] = dims[0].st;
3904  last = 5;
3905  for (j = 1; j < num_dims; ++j) {
3906  kmp_int64
3907  range_length; // To keep ranges of all dimensions but the first dims[0]
3908  if (dims[j].st == 1) { // most common case
3909  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3910  range_length = dims[j].up - dims[j].lo + 1;
3911  } else {
3912  if (dims[j].st > 0) {
3913  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3914  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3915  } else { // negative increment
3916  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3917  range_length =
3918  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3919  }
3920  }
3921  pr_buf->th_doacross_info[last++] = range_length;
3922  pr_buf->th_doacross_info[last++] = dims[j].lo;
3923  pr_buf->th_doacross_info[last++] = dims[j].up;
3924  pr_buf->th_doacross_info[last++] = dims[j].st;
3925  }
3926 
3927  // Compute total trip count.
3928  // Start with range of dims[0] which we don't need to keep in the buffer.
3929  if (dims[0].st == 1) { // most common case
3930  trace_count = dims[0].up - dims[0].lo + 1;
3931  } else if (dims[0].st > 0) {
3932  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3933  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3934  } else { // negative increment
3935  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3936  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3937  }
3938  for (j = 1; j < num_dims; ++j) {
3939  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3940  }
3941  KMP_DEBUG_ASSERT(trace_count > 0);
3942 
3943  // Check if shared buffer is not occupied by other loop (idx -
3944  // __kmp_dispatch_num_buffers)
3945  if (idx != sh_buf->doacross_buf_idx) {
3946  // Shared buffer is occupied, wait for it to be free
3947  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3948  __kmp_eq_4, NULL);
3949  }
3950 #if KMP_32_BIT_ARCH
3951  // Check if we are the first thread. After the CAS the first thread gets 0,
3952  // others get 1 if initialization is in progress, allocated pointer otherwise.
3953  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3954  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3955  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3956 #else
3957  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3958  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3959 #endif
3960  if (flags == NULL) {
3961  // we are the first thread, allocate the array of flags
3962  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3963  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3964  KMP_MB();
3965  sh_buf->doacross_flags = flags;
3966  } else if (flags == (kmp_uint32 *)1) {
3967 #if KMP_32_BIT_ARCH
3968  // initialization is still in progress, need to wait
3969  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3970 #else
3971  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3972 #endif
3973  KMP_YIELD(TRUE);
3974  KMP_MB();
3975  } else {
3976  KMP_MB();
3977  }
3978  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3979  pr_buf->th_doacross_flags =
3980  sh_buf->doacross_flags; // save private copy in order to not
3981  // touch shared buffer on each iteration
3982  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3983 }
3984 
3985 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3986  kmp_int32 shft, num_dims, i;
3987  kmp_uint32 flag;
3988  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3989  kmp_info_t *th = __kmp_threads[gtid];
3990  kmp_team_t *team = th->th.th_team;
3991  kmp_disp_t *pr_buf;
3992  kmp_int64 lo, up, st;
3993 
3994  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3995  if (team->t.t_serialized) {
3996  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3997  return; // no dependencies if team is serialized
3998  }
3999 
4000  // calculate sequential iteration number and check out-of-bounds condition
4001  pr_buf = th->th.th_dispatch;
4002  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4003  num_dims = pr_buf->th_doacross_info[0];
4004  lo = pr_buf->th_doacross_info[2];
4005  up = pr_buf->th_doacross_info[3];
4006  st = pr_buf->th_doacross_info[4];
4007  if (st == 1) { // most common case
4008  if (vec[0] < lo || vec[0] > up) {
4009  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4010  "bounds [%lld,%lld]\n",
4011  gtid, vec[0], lo, up));
4012  return;
4013  }
4014  iter_number = vec[0] - lo;
4015  } else if (st > 0) {
4016  if (vec[0] < lo || vec[0] > up) {
4017  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4018  "bounds [%lld,%lld]\n",
4019  gtid, vec[0], lo, up));
4020  return;
4021  }
4022  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4023  } else { // negative increment
4024  if (vec[0] > lo || vec[0] < up) {
4025  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4026  "bounds [%lld,%lld]\n",
4027  gtid, vec[0], lo, up));
4028  return;
4029  }
4030  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4031  }
4032  for (i = 1; i < num_dims; ++i) {
4033  kmp_int64 iter, ln;
4034  kmp_int32 j = i * 4;
4035  ln = pr_buf->th_doacross_info[j + 1];
4036  lo = pr_buf->th_doacross_info[j + 2];
4037  up = pr_buf->th_doacross_info[j + 3];
4038  st = pr_buf->th_doacross_info[j + 4];
4039  if (st == 1) {
4040  if (vec[i] < lo || vec[i] > up) {
4041  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4042  "bounds [%lld,%lld]\n",
4043  gtid, vec[i], lo, up));
4044  return;
4045  }
4046  iter = vec[i] - lo;
4047  } else if (st > 0) {
4048  if (vec[i] < lo || vec[i] > up) {
4049  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4050  "bounds [%lld,%lld]\n",
4051  gtid, vec[i], lo, up));
4052  return;
4053  }
4054  iter = (kmp_uint64)(vec[i] - lo) / st;
4055  } else { // st < 0
4056  if (vec[i] > lo || vec[i] < up) {
4057  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058  "bounds [%lld,%lld]\n",
4059  gtid, vec[i], lo, up));
4060  return;
4061  }
4062  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4063  }
4064  iter_number = iter + ln * iter_number;
4065  }
4066  shft = iter_number % 32; // use 32-bit granularity
4067  iter_number >>= 5; // divided by 32
4068  flag = 1 << shft;
4069  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4070  KMP_YIELD(TRUE);
4071  }
4072  KMP_MB();
4073  KA_TRACE(20,
4074  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4075  gtid, (iter_number << 5) + shft));
4076 }
4077 
4078 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4079  kmp_int32 shft, num_dims, i;
4080  kmp_uint32 flag;
4081  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4082  kmp_info_t *th = __kmp_threads[gtid];
4083  kmp_team_t *team = th->th.th_team;
4084  kmp_disp_t *pr_buf;
4085  kmp_int64 lo, st;
4086 
4087  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4088  if (team->t.t_serialized) {
4089  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4090  return; // no dependencies if team is serialized
4091  }
4092 
4093  // calculate sequential iteration number (same as in "wait" but no
4094  // out-of-bounds checks)
4095  pr_buf = th->th.th_dispatch;
4096  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4097  num_dims = pr_buf->th_doacross_info[0];
4098  lo = pr_buf->th_doacross_info[2];
4099  st = pr_buf->th_doacross_info[4];
4100  if (st == 1) { // most common case
4101  iter_number = vec[0] - lo;
4102  } else if (st > 0) {
4103  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4104  } else { // negative increment
4105  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4106  }
4107  for (i = 1; i < num_dims; ++i) {
4108  kmp_int64 iter, ln;
4109  kmp_int32 j = i * 4;
4110  ln = pr_buf->th_doacross_info[j + 1];
4111  lo = pr_buf->th_doacross_info[j + 2];
4112  st = pr_buf->th_doacross_info[j + 4];
4113  if (st == 1) {
4114  iter = vec[i] - lo;
4115  } else if (st > 0) {
4116  iter = (kmp_uint64)(vec[i] - lo) / st;
4117  } else { // st < 0
4118  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4119  }
4120  iter_number = iter + ln * iter_number;
4121  }
4122  shft = iter_number % 32; // use 32-bit granularity
4123  iter_number >>= 5; // divided by 32
4124  flag = 1 << shft;
4125  KMP_MB();
4126  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4127  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4128  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4129  (iter_number << 5) + shft));
4130 }
4131 
4132 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4133  kmp_int32 num_done;
4134  kmp_info_t *th = __kmp_threads[gtid];
4135  kmp_team_t *team = th->th.th_team;
4136  kmp_disp_t *pr_buf = th->th.th_dispatch;
4137 
4138  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4139  if (team->t.t_serialized) {
4140  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4141  return; // nothing to do
4142  }
4143  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4144  if (num_done == th->th.th_team_nproc) {
4145  // we are the last thread, need to free shared resources
4146  int idx = pr_buf->th_doacross_buf_idx - 1;
4147  dispatch_shared_info_t *sh_buf =
4148  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4149  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4150  (kmp_int64)&sh_buf->doacross_num_done);
4151  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4152  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4153  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4154  sh_buf->doacross_flags = NULL;
4155  sh_buf->doacross_num_done = 0;
4156  sh_buf->doacross_buf_idx +=
4157  __kmp_dispatch_num_buffers; // free buffer for future re-use
4158  }
4159  // free private resources (need to keep buffer index forever)
4160  pr_buf->th_doacross_flags = NULL;
4161  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4162  pr_buf->th_doacross_info = NULL;
4163  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4164 }
4165 
4166 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4167 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4168  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4169 }
4170 
4171 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4172  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4173 }
4174 
4175 int __kmpc_get_target_offload(void) {
4176  if (!__kmp_init_serial) {
4177  __kmp_serial_initialize();
4178  }
4179  return __kmp_target_offload;
4180 }
4181 
4182 int __kmpc_pause_resource(kmp_pause_status_t level) {
4183  if (!__kmp_init_serial) {
4184  return 1; // Can't pause if runtime is not initialized
4185  }
4186  return __kmp_pause_resource(level);
4187 }
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:222
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1399
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:232
kmp_int32 flags
Definition: kmp.h:224