16 #include "kmp_error.h" 20 #include "kmp_stats.h" 23 #include "ompt-specific.h" 26 #define MAX_MESSAGE 512 42 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
46 }
else if (__kmp_ignore_mppbeg() == FALSE) {
48 __kmp_internal_begin();
49 KC_TRACE(10, (
"__kmpc_begin: called\n"));
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, (
"__kmpc_end: called\n"));
69 KA_TRACE(30, (
"__kmpc_end\n"));
71 __kmp_internal_end_thread(-1);
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
102 kmp_int32 gtid = __kmp_entry_gtid();
104 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
125 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 return TCR_4(__kmp_all_nth);
137 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
147 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
169 if (__kmp_par_range == 0) {
176 semi2 = strchr(semi2,
';');
180 semi2 = strchr(semi2 + 1,
';');
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
189 if ((*name ==
'/') || (*name ==
';')) {
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
196 semi3 = strchr(semi2 + 1,
';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
203 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
207 return __kmp_par_range < 0;
221 return __kmp_entry_thread()->th.th_root->r.r_active;
234 kmp_int32 num_threads) {
235 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
238 __kmp_push_num_threads(loc, global_tid, num_threads);
241 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
247 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
248 kmp_int32 proc_bind) {
249 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
266 int gtid = __kmp_entry_gtid();
268 #if (KMP_STATS_ENABLED) 272 if (previous_state == stats_state_e::SERIAL_REGION) {
273 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
275 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
288 va_start(ap, microtask);
291 ompt_frame_t *ompt_frame;
292 if (ompt_enabled.enabled) {
293 kmp_info_t *master_th = __kmp_threads[gtid];
294 kmp_team_t *parent_team = master_th->th.th_team;
295 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
297 ompt_frame = &(lwt->ompt_task_info.frame);
299 int tid = __kmp_tid_from_gtid(gtid);
301 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
303 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
304 OMPT_STORE_RETURN_ADDRESS(gtid);
308 #if INCLUDE_SSC_MARKS 311 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
312 VOLATILE_CAST(microtask_t) microtask,
313 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
315 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321 #if INCLUDE_SSC_MARKS 324 __kmp_join_call(loc, gtid
334 #if KMP_STATS_ENABLED 335 if (previous_state == stats_state_e::SERIAL_REGION) {
336 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
338 KMP_POP_PARTITIONED_TIMER();
340 #endif // KMP_STATS_ENABLED 355 kmp_int32 num_teams, kmp_int32 num_threads) {
357 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
358 global_tid, num_teams, num_threads));
360 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
375 int gtid = __kmp_entry_gtid();
376 kmp_info_t *this_thr = __kmp_threads[gtid];
378 va_start(ap, microtask);
380 #if KMP_STATS_ENABLED 383 if (previous_state == stats_state_e::SERIAL_REGION) {
384 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
386 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
391 this_thr->th.th_teams_microtask = microtask;
392 this_thr->th.th_teams_level =
393 this_thr->th.th_team->t.t_level;
396 kmp_team_t *parent_team = this_thr->th.th_team;
397 int tid = __kmp_tid_from_gtid(gtid);
398 if (ompt_enabled.enabled) {
399 parent_team->t.t_implicit_task_taskdata[tid]
400 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
402 OMPT_STORE_RETURN_ADDRESS(gtid);
407 if (this_thr->th.th_teams_size.nteams == 0) {
408 __kmp_push_num_teams(loc, gtid, 0, 0);
410 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
414 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
415 VOLATILE_CAST(microtask_t)
417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
418 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
424 __kmp_join_call(loc, gtid
432 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
433 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
434 this_thr->th.th_cg_roots = tmp->up;
435 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up" 436 " to node %p. cg_nthreads was %d\n",
437 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
438 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
439 int i = tmp->cg_nthreads--;
444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
445 this_thr->th.th_current_task->td_icvs.thread_limit =
446 this_thr->th.th_cg_roots->cg_thread_limit;
448 this_thr->th.th_teams_microtask = NULL;
449 this_thr->th.th_teams_level = 0;
450 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452 #if KMP_STATS_ENABLED 453 if (previous_state == stats_state_e::SERIAL_REGION) {
454 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456 KMP_POP_PARTITIONED_TIMER();
458 #endif // KMP_STATS_ENABLED 465 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
484 OMPT_STORE_RETURN_ADDRESS(global_tid);
486 __kmp_serialized_parallel(loc, global_tid);
497 kmp_internal_control_t *top;
498 kmp_info_t *this_thr;
499 kmp_team_t *serial_team;
502 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
510 if (!TCR_4(__kmp_init_parallel))
511 __kmp_parallel_initialize();
513 __kmp_resume_if_soft_paused();
515 this_thr = __kmp_threads[global_tid];
516 serial_team = this_thr->th.th_serial_team;
518 kmp_task_team_t *task_team = this_thr->th.th_task_team;
520 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
521 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
524 KMP_DEBUG_ASSERT(serial_team);
525 KMP_ASSERT(serial_team->t.t_serialized);
526 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
527 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
528 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
529 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
532 if (ompt_enabled.enabled &&
533 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
534 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
535 if (ompt_enabled.ompt_callback_implicit_task) {
536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
537 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
538 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
542 ompt_data_t *parent_task_data;
543 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
545 if (ompt_enabled.ompt_callback_parallel_end) {
546 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
547 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
548 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
550 __ompt_lw_taskteam_unlink(this_thr);
551 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
557 top = serial_team->t.t_control_stack_top;
558 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
559 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
560 serial_team->t.t_control_stack_top = top->next;
565 serial_team->t.t_level--;
568 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
570 dispatch_private_info_t *disp_buffer =
571 serial_team->t.t_dispatch->th_disp_buffer;
572 serial_team->t.t_dispatch->th_disp_buffer =
573 serial_team->t.t_dispatch->th_disp_buffer->next;
574 __kmp_free(disp_buffer);
576 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
578 --serial_team->t.t_serialized;
579 if (serial_team->t.t_serialized == 0) {
583 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 584 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
585 __kmp_clear_x87_fpu_status_word();
586 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
587 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
591 this_thr->th.th_team = serial_team->t.t_parent;
592 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
595 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
596 this_thr->th.th_team_master =
597 serial_team->t.t_parent->t.t_threads[0];
598 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
601 this_thr->th.th_dispatch =
602 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
604 __kmp_pop_current_task_from_thread(this_thr);
606 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
607 this_thr->th.th_current_task->td_flags.executing = 1;
609 if (__kmp_tasking_mode != tskm_immediate_exec) {
611 this_thr->th.th_task_team =
612 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
614 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 616 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
619 if (__kmp_tasking_mode != tskm_immediate_exec) {
620 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 621 "depth of serial team %p to %d\n",
622 global_tid, serial_team, serial_team->t.t_serialized));
626 if (__kmp_env_consistency_check)
627 __kmp_pop_parallel(global_tid, NULL);
629 if (ompt_enabled.enabled)
630 this_thr->th.ompt_thread_info.state =
631 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
632 : ompt_state_work_parallel);
645 KC_TRACE(10, (
"__kmpc_flush: called\n"));
650 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 664 if (!__kmp_cpuinfo.initialized) {
665 __kmp_query_cpuid(&__kmp_cpuinfo);
667 if (!__kmp_cpuinfo.sse2) {
672 #elif KMP_COMPILER_MSVC 675 __sync_synchronize();
676 #endif // KMP_COMPILER_ICC 679 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 695 #error Unknown or unsupported architecture 698 #if OMPT_SUPPORT && OMPT_OPTIONAL 699 if (ompt_enabled.ompt_callback_flush) {
700 ompt_callbacks.ompt_callback(ompt_callback_flush)(
701 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
716 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
718 if (!TCR_4(__kmp_init_parallel))
719 __kmp_parallel_initialize();
721 __kmp_resume_if_soft_paused();
723 if (__kmp_env_consistency_check) {
725 KMP_WARNING(ConstructIdentInvalid);
727 __kmp_check_barrier(global_tid, ct_barrier, loc);
731 ompt_frame_t *ompt_frame;
732 if (ompt_enabled.enabled) {
733 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
734 if (ompt_frame->enter_frame.ptr == NULL)
735 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
736 OMPT_STORE_RETURN_ADDRESS(global_tid);
739 __kmp_threads[global_tid]->th.th_ident = loc;
747 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
748 #if OMPT_SUPPORT && OMPT_OPTIONAL 749 if (ompt_enabled.enabled) {
750 ompt_frame->enter_frame = ompt_data_none;
765 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
767 if (!TCR_4(__kmp_init_parallel))
768 __kmp_parallel_initialize();
770 __kmp_resume_if_soft_paused();
772 if (KMP_MASTER_GTID(global_tid)) {
774 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
778 #if OMPT_SUPPORT && OMPT_OPTIONAL 780 if (ompt_enabled.ompt_callback_master) {
781 kmp_info_t *this_thr = __kmp_threads[global_tid];
782 kmp_team_t *team = this_thr->th.th_team;
784 int tid = __kmp_tid_from_gtid(global_tid);
785 ompt_callbacks.ompt_callback(ompt_callback_master)(
786 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
787 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
788 OMPT_GET_RETURN_ADDRESS(0));
793 if (__kmp_env_consistency_check) {
794 #if KMP_USE_DYNAMIC_LOCK 796 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
798 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
801 __kmp_push_sync(global_tid, ct_master, loc, NULL);
803 __kmp_check_sync(global_tid, ct_master, loc, NULL);
819 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
821 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
822 KMP_POP_PARTITIONED_TIMER();
824 #if OMPT_SUPPORT && OMPT_OPTIONAL 825 kmp_info_t *this_thr = __kmp_threads[global_tid];
826 kmp_team_t *team = this_thr->th.th_team;
827 if (ompt_enabled.ompt_callback_master) {
828 int tid = __kmp_tid_from_gtid(global_tid);
829 ompt_callbacks.ompt_callback(ompt_callback_master)(
830 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832 OMPT_GET_RETURN_ADDRESS(0));
836 if (__kmp_env_consistency_check) {
838 KMP_WARNING(ThreadIdentInvalid);
840 if (KMP_MASTER_GTID(global_tid))
841 __kmp_pop_sync(global_tid, ct_master, loc);
855 KMP_DEBUG_ASSERT(__kmp_init_serial);
857 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
859 if (!TCR_4(__kmp_init_parallel))
860 __kmp_parallel_initialize();
862 __kmp_resume_if_soft_paused();
865 __kmp_itt_ordered_prep(gtid);
869 th = __kmp_threads[gtid];
871 #if OMPT_SUPPORT && OMPT_OPTIONAL 875 if (ompt_enabled.enabled) {
876 OMPT_STORE_RETURN_ADDRESS(gtid);
877 team = __kmp_team_from_gtid(gtid);
878 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
880 th->th.ompt_thread_info.wait_id = lck;
881 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
884 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
885 if (ompt_enabled.ompt_callback_mutex_acquire) {
886 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
887 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
893 if (th->th.th_dispatch->th_deo_fcn != 0)
894 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
896 __kmp_parallel_deo(>id, &cid, loc);
898 #if OMPT_SUPPORT && OMPT_OPTIONAL 899 if (ompt_enabled.enabled) {
901 th->th.ompt_thread_info.state = ompt_state_work_parallel;
902 th->th.ompt_thread_info.wait_id = 0;
905 if (ompt_enabled.ompt_callback_mutex_acquired) {
906 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
907 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
913 __kmp_itt_ordered_start(gtid);
928 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
931 __kmp_itt_ordered_end(gtid);
935 th = __kmp_threads[gtid];
937 if (th->th.th_dispatch->th_dxo_fcn != 0)
938 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
940 __kmp_parallel_dxo(>id, &cid, loc);
942 #if OMPT_SUPPORT && OMPT_OPTIONAL 943 OMPT_STORE_RETURN_ADDRESS(gtid);
944 if (ompt_enabled.ompt_callback_mutex_released) {
945 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
947 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
948 ->t.t_ordered.dt.t_value,
949 OMPT_LOAD_RETURN_ADDRESS(gtid));
954 #if KMP_USE_DYNAMIC_LOCK 956 static __forceinline
void 957 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
958 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
962 kmp_indirect_lock_t **lck;
963 lck = (kmp_indirect_lock_t **)crit;
964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966 KMP_SET_I_LOCK_LOCATION(ilk, loc);
967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
969 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
971 __kmp_itt_critical_creating(ilk->lock, loc);
973 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
976 __kmp_itt_critical_destroyed(ilk->lock);
982 KMP_DEBUG_ASSERT(*lck != NULL);
986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 994 KMP_FSYNC_PREPARE(l); \ 995 KMP_INIT_YIELD(spins); \ 996 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 998 if (TCR_4(__kmp_nth) > \ 999 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1002 KMP_YIELD_SPIN(spins); \ 1004 __kmp_spin_backoff(&backoff); \ 1006 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1009 KMP_FSYNC_ACQUIRED(l); \ 1013 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1015 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1016 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1017 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1018 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1019 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1023 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1024 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1028 #include <sys/syscall.h> 1031 #define FUTEX_WAIT 0 1034 #define FUTEX_WAKE 1 1038 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1041 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1043 KMP_FSYNC_PREPARE(ftx); \ 1044 kmp_int32 poll_val; \ 1045 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1046 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1047 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1048 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1050 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1052 KMP_LOCK_BUSY(1, futex))) { \ 1055 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1058 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1059 NULL, NULL, 0)) != 0) { \ 1064 KMP_FSYNC_ACQUIRED(ftx); \ 1068 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1070 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1071 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1072 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1073 KMP_FSYNC_ACQUIRED(ftx); \ 1081 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1083 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1085 KMP_FSYNC_RELEASING(ftx); \ 1086 kmp_int32 poll_val = \ 1087 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1088 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1089 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1090 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1093 KMP_YIELD_OVERSUB(); \ 1096 #endif // KMP_USE_FUTEX 1098 #else // KMP_USE_DYNAMIC_LOCK 1100 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1103 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1106 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1113 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1114 __kmp_init_user_lock_with_checks(lck);
1115 __kmp_set_user_lock_location(lck, loc);
1117 __kmp_itt_critical_creating(lck);
1128 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1133 __kmp_itt_critical_destroyed(lck);
1137 __kmp_destroy_user_lock_with_checks(lck);
1138 __kmp_user_lock_free(&idx, gtid, lck);
1139 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1140 KMP_DEBUG_ASSERT(lck != NULL);
1146 #endif // KMP_USE_DYNAMIC_LOCK 1159 kmp_critical_name *crit) {
1160 #if KMP_USE_DYNAMIC_LOCK 1161 #if OMPT_SUPPORT && OMPT_OPTIONAL 1162 OMPT_STORE_RETURN_ADDRESS(global_tid);
1163 #endif // OMPT_SUPPORT 1167 #if OMPT_SUPPORT && OMPT_OPTIONAL 1168 ompt_state_t prev_state = ompt_state_undefined;
1169 ompt_thread_info_t ti;
1171 kmp_user_lock_p lck;
1173 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1177 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1178 KMP_CHECK_USER_LOCK_INIT();
1180 if ((__kmp_user_lock_kind == lk_tas) &&
1181 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1182 lck = (kmp_user_lock_p)crit;
1185 else if ((__kmp_user_lock_kind == lk_futex) &&
1186 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1187 lck = (kmp_user_lock_p)crit;
1191 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1194 if (__kmp_env_consistency_check)
1195 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1203 __kmp_itt_critical_acquiring(lck);
1205 #if OMPT_SUPPORT && OMPT_OPTIONAL 1206 OMPT_STORE_RETURN_ADDRESS(gtid);
1207 void *codeptr_ra = NULL;
1208 if (ompt_enabled.enabled) {
1209 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1211 prev_state = ti.state;
1212 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1213 ti.state = ompt_state_wait_critical;
1216 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1217 if (ompt_enabled.ompt_callback_mutex_acquire) {
1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1219 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1220 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1226 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1229 __kmp_itt_critical_acquired(lck);
1231 #if OMPT_SUPPORT && OMPT_OPTIONAL 1232 if (ompt_enabled.enabled) {
1234 ti.state = prev_state;
1238 if (ompt_enabled.ompt_callback_mutex_acquired) {
1239 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1240 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1244 KMP_POP_PARTITIONED_TIMER();
1246 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1247 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1248 #endif // KMP_USE_DYNAMIC_LOCK 1251 #if KMP_USE_DYNAMIC_LOCK 1254 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1256 #define KMP_TSX_LOCK(seq) lockseq_##seq 1258 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1261 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1262 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1264 #define KMP_CPUINFO_RTM 0 1268 if (hint & kmp_lock_hint_hle)
1269 return KMP_TSX_LOCK(hle);
1270 if (hint & kmp_lock_hint_rtm)
1271 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1272 if (hint & kmp_lock_hint_adaptive)
1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1276 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1277 return __kmp_user_lock_seq;
1278 if ((hint & omp_lock_hint_speculative) &&
1279 (hint & omp_lock_hint_nonspeculative))
1280 return __kmp_user_lock_seq;
1283 if (hint & omp_lock_hint_contended)
1284 return lockseq_queuing;
1287 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1291 if (hint & omp_lock_hint_speculative)
1292 return KMP_TSX_LOCK(hle);
1294 return __kmp_user_lock_seq;
1297 #if OMPT_SUPPORT && OMPT_OPTIONAL 1298 #if KMP_USE_DYNAMIC_LOCK 1299 static kmp_mutex_impl_t
1300 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1302 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1307 return kmp_mutex_impl_queuing;
1310 return kmp_mutex_impl_spin;
1313 return kmp_mutex_impl_speculative;
1316 return kmp_mutex_impl_none;
1318 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1321 switch (ilock->type) {
1323 case locktag_adaptive:
1325 return kmp_mutex_impl_speculative;
1327 case locktag_nested_tas:
1328 return kmp_mutex_impl_spin;
1330 case locktag_nested_futex:
1332 case locktag_ticket:
1333 case locktag_queuing:
1335 case locktag_nested_ticket:
1336 case locktag_nested_queuing:
1337 case locktag_nested_drdpa:
1338 return kmp_mutex_impl_queuing;
1340 return kmp_mutex_impl_none;
1345 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1346 switch (__kmp_user_lock_kind) {
1348 return kmp_mutex_impl_spin;
1355 return kmp_mutex_impl_queuing;
1360 return kmp_mutex_impl_speculative;
1363 return kmp_mutex_impl_none;
1366 #endif // KMP_USE_DYNAMIC_LOCK 1367 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1383 kmp_critical_name *crit, uint32_t hint) {
1385 kmp_user_lock_p lck;
1386 #if OMPT_SUPPORT && OMPT_OPTIONAL 1387 ompt_state_t prev_state = ompt_state_undefined;
1388 ompt_thread_info_t ti;
1390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1392 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1395 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1397 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1399 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1401 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1402 if (KMP_IS_D_LOCK(lckseq)) {
1403 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1404 KMP_GET_D_TAG(lckseq));
1406 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1412 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1413 lck = (kmp_user_lock_p)lk;
1414 if (__kmp_env_consistency_check) {
1415 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1416 __kmp_map_hint_to_lock(hint));
1419 __kmp_itt_critical_acquiring(lck);
1421 #if OMPT_SUPPORT && OMPT_OPTIONAL 1422 if (ompt_enabled.enabled) {
1423 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1425 prev_state = ti.state;
1426 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1427 ti.state = ompt_state_wait_critical;
1430 if (ompt_enabled.ompt_callback_mutex_acquire) {
1431 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1432 ompt_mutex_critical, (
unsigned int)hint,
1433 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1438 #if KMP_USE_INLINED_TAS 1439 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1440 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1442 #elif KMP_USE_INLINED_FUTEX 1443 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1444 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1448 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1451 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1453 if (__kmp_env_consistency_check) {
1454 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1455 __kmp_map_hint_to_lock(hint));
1458 __kmp_itt_critical_acquiring(lck);
1460 #if OMPT_SUPPORT && OMPT_OPTIONAL 1461 if (ompt_enabled.enabled) {
1462 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1464 prev_state = ti.state;
1465 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1466 ti.state = ompt_state_wait_critical;
1469 if (ompt_enabled.ompt_callback_mutex_acquire) {
1470 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1471 ompt_mutex_critical, (
unsigned int)hint,
1472 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1477 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1479 KMP_POP_PARTITIONED_TIMER();
1482 __kmp_itt_critical_acquired(lck);
1484 #if OMPT_SUPPORT && OMPT_OPTIONAL 1485 if (ompt_enabled.enabled) {
1487 ti.state = prev_state;
1491 if (ompt_enabled.ompt_callback_mutex_acquired) {
1492 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1493 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1498 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1499 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1502 #endif // KMP_USE_DYNAMIC_LOCK 1514 kmp_critical_name *crit) {
1515 kmp_user_lock_p lck;
1517 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1519 #if KMP_USE_DYNAMIC_LOCK 1520 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1521 lck = (kmp_user_lock_p)crit;
1522 KMP_ASSERT(lck != NULL);
1523 if (__kmp_env_consistency_check) {
1524 __kmp_pop_sync(global_tid, ct_critical, loc);
1527 __kmp_itt_critical_releasing(lck);
1529 #if KMP_USE_INLINED_TAS 1530 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1531 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1533 #elif KMP_USE_INLINED_FUTEX 1534 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1535 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1539 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1542 kmp_indirect_lock_t *ilk =
1543 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1544 KMP_ASSERT(ilk != NULL);
1546 if (__kmp_env_consistency_check) {
1547 __kmp_pop_sync(global_tid, ct_critical, loc);
1550 __kmp_itt_critical_releasing(lck);
1552 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1555 #else // KMP_USE_DYNAMIC_LOCK 1557 if ((__kmp_user_lock_kind == lk_tas) &&
1558 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1559 lck = (kmp_user_lock_p)crit;
1562 else if ((__kmp_user_lock_kind == lk_futex) &&
1563 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1564 lck = (kmp_user_lock_p)crit;
1568 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1571 KMP_ASSERT(lck != NULL);
1573 if (__kmp_env_consistency_check)
1574 __kmp_pop_sync(global_tid, ct_critical, loc);
1577 __kmp_itt_critical_releasing(lck);
1581 __kmp_release_user_lock_with_checks(lck, global_tid);
1583 #endif // KMP_USE_DYNAMIC_LOCK 1585 #if OMPT_SUPPORT && OMPT_OPTIONAL 1588 OMPT_STORE_RETURN_ADDRESS(global_tid);
1589 if (ompt_enabled.ompt_callback_mutex_released) {
1590 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1591 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1592 OMPT_LOAD_RETURN_ADDRESS(0));
1596 KMP_POP_PARTITIONED_TIMER();
1597 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1612 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1614 if (!TCR_4(__kmp_init_parallel))
1615 __kmp_parallel_initialize();
1617 __kmp_resume_if_soft_paused();
1619 if (__kmp_env_consistency_check)
1620 __kmp_check_barrier(global_tid, ct_barrier, loc);
1623 ompt_frame_t *ompt_frame;
1624 if (ompt_enabled.enabled) {
1625 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1626 if (ompt_frame->enter_frame.ptr == NULL)
1627 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1628 OMPT_STORE_RETURN_ADDRESS(global_tid);
1632 __kmp_threads[global_tid]->th.th_ident = loc;
1634 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1635 #if OMPT_SUPPORT && OMPT_OPTIONAL 1636 if (ompt_enabled.enabled) {
1637 ompt_frame->enter_frame = ompt_data_none;
1641 return (status != 0) ? 0 : 1;
1654 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1656 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1672 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1674 if (!TCR_4(__kmp_init_parallel))
1675 __kmp_parallel_initialize();
1677 __kmp_resume_if_soft_paused();
1679 if (__kmp_env_consistency_check) {
1681 KMP_WARNING(ConstructIdentInvalid);
1683 __kmp_check_barrier(global_tid, ct_barrier, loc);
1687 ompt_frame_t *ompt_frame;
1688 if (ompt_enabled.enabled) {
1689 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1690 if (ompt_frame->enter_frame.ptr == NULL)
1691 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1692 OMPT_STORE_RETURN_ADDRESS(global_tid);
1696 __kmp_threads[global_tid]->th.th_ident = loc;
1698 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1699 #if OMPT_SUPPORT && OMPT_OPTIONAL 1700 if (ompt_enabled.enabled) {
1701 ompt_frame->enter_frame = ompt_data_none;
1707 if (__kmp_env_consistency_check) {
1711 if (global_tid < 0) {
1712 KMP_WARNING(ThreadIdentInvalid);
1718 __kmp_pop_sync(global_tid, ct_master, loc);
1738 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1743 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1746 #if OMPT_SUPPORT && OMPT_OPTIONAL 1747 kmp_info_t *this_thr = __kmp_threads[global_tid];
1748 kmp_team_t *team = this_thr->th.th_team;
1749 int tid = __kmp_tid_from_gtid(global_tid);
1751 if (ompt_enabled.enabled) {
1753 if (ompt_enabled.ompt_callback_work) {
1754 ompt_callbacks.ompt_callback(ompt_callback_work)(
1755 ompt_work_single_executor, ompt_scope_begin,
1756 &(team->t.ompt_team_info.parallel_data),
1757 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1758 1, OMPT_GET_RETURN_ADDRESS(0));
1761 if (ompt_enabled.ompt_callback_work) {
1762 ompt_callbacks.ompt_callback(ompt_callback_work)(
1763 ompt_work_single_other, ompt_scope_begin,
1764 &(team->t.ompt_team_info.parallel_data),
1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766 1, OMPT_GET_RETURN_ADDRESS(0));
1767 ompt_callbacks.ompt_callback(ompt_callback_work)(
1768 ompt_work_single_other, ompt_scope_end,
1769 &(team->t.ompt_team_info.parallel_data),
1770 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1771 1, OMPT_GET_RETURN_ADDRESS(0));
1790 __kmp_exit_single(global_tid);
1791 KMP_POP_PARTITIONED_TIMER();
1793 #if OMPT_SUPPORT && OMPT_OPTIONAL 1794 kmp_info_t *this_thr = __kmp_threads[global_tid];
1795 kmp_team_t *team = this_thr->th.th_team;
1796 int tid = __kmp_tid_from_gtid(global_tid);
1798 if (ompt_enabled.ompt_callback_work) {
1799 ompt_callbacks.ompt_callback(ompt_callback_work)(
1800 ompt_work_single_executor, ompt_scope_end,
1801 &(team->t.ompt_team_info.parallel_data),
1802 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1803 OMPT_GET_RETURN_ADDRESS(0));
1816 KMP_POP_PARTITIONED_TIMER();
1817 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1819 #if OMPT_SUPPORT && OMPT_OPTIONAL 1820 if (ompt_enabled.ompt_callback_work) {
1821 ompt_work_t ompt_work_type = ompt_work_loop;
1822 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1823 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1827 ompt_work_type = ompt_work_loop;
1829 ompt_work_type = ompt_work_sections;
1831 ompt_work_type = ompt_work_distribute;
1836 KMP_DEBUG_ASSERT(ompt_work_type);
1838 ompt_callbacks.ompt_callback(ompt_callback_work)(
1839 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1840 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1843 if (__kmp_env_consistency_check)
1844 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1850 void ompc_set_num_threads(
int arg) {
1852 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1855 void ompc_set_dynamic(
int flag) {
1859 thread = __kmp_entry_thread();
1861 __kmp_save_internal_controls(thread);
1863 set__dynamic(thread, flag ? TRUE : FALSE);
1866 void ompc_set_nested(
int flag) {
1870 thread = __kmp_entry_thread();
1872 __kmp_save_internal_controls(thread);
1874 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1877 void ompc_set_max_active_levels(
int max_active_levels) {
1882 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1885 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1887 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1890 int ompc_get_ancestor_thread_num(
int level) {
1891 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1894 int ompc_get_team_size(
int level) {
1895 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1900 void ompc_set_affinity_format(
char const *format) {
1901 if (!__kmp_init_serial) {
1902 __kmp_serial_initialize();
1904 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1905 format, KMP_STRLEN(format) + 1);
1908 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1910 if (!__kmp_init_serial) {
1911 __kmp_serial_initialize();
1913 format_size = KMP_STRLEN(__kmp_affinity_format);
1914 if (buffer && size) {
1915 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1921 void ompc_display_affinity(
char const *format) {
1923 if (!TCR_4(__kmp_init_middle)) {
1924 __kmp_middle_initialize();
1926 gtid = __kmp_get_gtid();
1927 __kmp_aux_display_affinity(gtid, format);
1930 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1931 char const *format) {
1933 size_t num_required;
1934 kmp_str_buf_t capture_buf;
1935 if (!TCR_4(__kmp_init_middle)) {
1936 __kmp_middle_initialize();
1938 gtid = __kmp_get_gtid();
1939 __kmp_str_buf_init(&capture_buf);
1940 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1941 if (buffer && buf_size) {
1942 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1943 capture_buf.used + 1);
1945 __kmp_str_buf_free(&capture_buf);
1946 return num_required;
1949 void kmpc_set_stacksize(
int arg) {
1951 __kmp_aux_set_stacksize(arg);
1954 void kmpc_set_stacksize_s(
size_t arg) {
1956 __kmp_aux_set_stacksize(arg);
1959 void kmpc_set_blocktime(
int arg) {
1963 gtid = __kmp_entry_gtid();
1964 tid = __kmp_tid_from_gtid(gtid);
1965 thread = __kmp_thread_from_gtid(gtid);
1967 __kmp_aux_set_blocktime(arg, thread, tid);
1970 void kmpc_set_library(
int arg) {
1972 __kmp_user_set_library((
enum library_type)arg);
1975 void kmpc_set_defaults(
char const *str) {
1977 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1980 void kmpc_set_disp_num_buffers(
int arg) {
1983 if (__kmp_init_serial == 0 && arg > 0)
1984 __kmp_dispatch_num_buffers = arg;
1987 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1991 if (!TCR_4(__kmp_init_middle)) {
1992 __kmp_middle_initialize();
1994 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1998 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1999 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2002 if (!TCR_4(__kmp_init_middle)) {
2003 __kmp_middle_initialize();
2005 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2009 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2013 if (!TCR_4(__kmp_init_middle)) {
2014 __kmp_middle_initialize();
2016 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2066 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2070 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2074 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2076 if (__kmp_env_consistency_check) {
2078 KMP_WARNING(ConstructIdentInvalid);
2085 *data_ptr = cpy_data;
2088 ompt_frame_t *ompt_frame;
2089 if (ompt_enabled.enabled) {
2090 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2091 if (ompt_frame->enter_frame.ptr == NULL)
2092 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2093 OMPT_STORE_RETURN_ADDRESS(gtid);
2098 __kmp_threads[gtid]->th.th_ident = loc;
2100 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2103 (*cpy_func)(cpy_data, *data_ptr);
2109 if (ompt_enabled.enabled) {
2110 OMPT_STORE_RETURN_ADDRESS(gtid);
2114 __kmp_threads[gtid]->th.th_ident = loc;
2117 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2118 #if OMPT_SUPPORT && OMPT_OPTIONAL 2119 if (ompt_enabled.enabled) {
2120 ompt_frame->enter_frame = ompt_data_none;
2127 #define INIT_LOCK __kmp_init_user_lock_with_checks 2128 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2129 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2130 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2131 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2132 #define ACQUIRE_NESTED_LOCK_TIMED \ 2133 __kmp_acquire_nested_user_lock_with_checks_timed 2134 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2135 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2136 #define TEST_LOCK __kmp_test_user_lock_with_checks 2137 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2138 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2139 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2144 #if KMP_USE_DYNAMIC_LOCK 2147 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2148 kmp_dyna_lockseq_t seq) {
2149 if (KMP_IS_D_LOCK(seq)) {
2150 KMP_INIT_D_LOCK(lock, seq);
2152 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2155 KMP_INIT_I_LOCK(lock, seq);
2157 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2158 __kmp_itt_lock_creating(ilk->lock, loc);
2164 static __forceinline
void 2165 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2166 kmp_dyna_lockseq_t seq) {
2169 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2170 seq = __kmp_user_lock_seq;
2174 seq = lockseq_nested_tas;
2178 seq = lockseq_nested_futex;
2181 case lockseq_ticket:
2182 seq = lockseq_nested_ticket;
2184 case lockseq_queuing:
2185 seq = lockseq_nested_queuing;
2188 seq = lockseq_nested_drdpa;
2191 seq = lockseq_nested_queuing;
2193 KMP_INIT_I_LOCK(lock, seq);
2195 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2196 __kmp_itt_lock_creating(ilk->lock, loc);
2201 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2203 KMP_DEBUG_ASSERT(__kmp_init_serial);
2204 if (__kmp_env_consistency_check && user_lock == NULL) {
2205 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2208 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2210 #if OMPT_SUPPORT && OMPT_OPTIONAL 2212 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2214 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2215 if (ompt_enabled.ompt_callback_lock_init) {
2216 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2217 ompt_mutex_lock, (omp_lock_hint_t)hint,
2218 __ompt_get_mutex_impl_type(user_lock),
2219 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2225 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2226 void **user_lock, uintptr_t hint) {
2227 KMP_DEBUG_ASSERT(__kmp_init_serial);
2228 if (__kmp_env_consistency_check && user_lock == NULL) {
2229 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2232 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2234 #if OMPT_SUPPORT && OMPT_OPTIONAL 2236 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2238 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2239 if (ompt_enabled.ompt_callback_lock_init) {
2240 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2241 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2242 __ompt_get_mutex_impl_type(user_lock),
2243 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2248 #endif // KMP_USE_DYNAMIC_LOCK 2251 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2252 #if KMP_USE_DYNAMIC_LOCK 2254 KMP_DEBUG_ASSERT(__kmp_init_serial);
2255 if (__kmp_env_consistency_check && user_lock == NULL) {
2256 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2258 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2260 #if OMPT_SUPPORT && OMPT_OPTIONAL 2262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2264 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2265 if (ompt_enabled.ompt_callback_lock_init) {
2266 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2267 ompt_mutex_lock, omp_lock_hint_none,
2268 __ompt_get_mutex_impl_type(user_lock),
2269 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2273 #else // KMP_USE_DYNAMIC_LOCK 2275 static char const *
const func =
"omp_init_lock";
2276 kmp_user_lock_p lck;
2277 KMP_DEBUG_ASSERT(__kmp_init_serial);
2279 if (__kmp_env_consistency_check) {
2280 if (user_lock == NULL) {
2281 KMP_FATAL(LockIsUninitialized, func);
2285 KMP_CHECK_USER_LOCK_INIT();
2287 if ((__kmp_user_lock_kind == lk_tas) &&
2288 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2289 lck = (kmp_user_lock_p)user_lock;
2292 else if ((__kmp_user_lock_kind == lk_futex) &&
2293 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2294 lck = (kmp_user_lock_p)user_lock;
2298 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2301 __kmp_set_user_lock_location(lck, loc);
2303 #if OMPT_SUPPORT && OMPT_OPTIONAL 2305 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2307 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2308 if (ompt_enabled.ompt_callback_lock_init) {
2309 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2310 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2311 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2316 __kmp_itt_lock_creating(lck);
2319 #endif // KMP_USE_DYNAMIC_LOCK 2323 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2324 #if KMP_USE_DYNAMIC_LOCK 2326 KMP_DEBUG_ASSERT(__kmp_init_serial);
2327 if (__kmp_env_consistency_check && user_lock == NULL) {
2328 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2330 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2332 #if OMPT_SUPPORT && OMPT_OPTIONAL 2334 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2336 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2337 if (ompt_enabled.ompt_callback_lock_init) {
2338 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2339 ompt_mutex_nest_lock, omp_lock_hint_none,
2340 __ompt_get_mutex_impl_type(user_lock),
2341 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2345 #else // KMP_USE_DYNAMIC_LOCK 2347 static char const *
const func =
"omp_init_nest_lock";
2348 kmp_user_lock_p lck;
2349 KMP_DEBUG_ASSERT(__kmp_init_serial);
2351 if (__kmp_env_consistency_check) {
2352 if (user_lock == NULL) {
2353 KMP_FATAL(LockIsUninitialized, func);
2357 KMP_CHECK_USER_LOCK_INIT();
2359 if ((__kmp_user_lock_kind == lk_tas) &&
2360 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2361 OMP_NEST_LOCK_T_SIZE)) {
2362 lck = (kmp_user_lock_p)user_lock;
2365 else if ((__kmp_user_lock_kind == lk_futex) &&
2366 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2367 OMP_NEST_LOCK_T_SIZE)) {
2368 lck = (kmp_user_lock_p)user_lock;
2372 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2375 INIT_NESTED_LOCK(lck);
2376 __kmp_set_user_lock_location(lck, loc);
2378 #if OMPT_SUPPORT && OMPT_OPTIONAL 2380 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2382 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2383 if (ompt_enabled.ompt_callback_lock_init) {
2384 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2385 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2386 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2391 __kmp_itt_lock_creating(lck);
2394 #endif // KMP_USE_DYNAMIC_LOCK 2397 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2398 #if KMP_USE_DYNAMIC_LOCK 2401 kmp_user_lock_p lck;
2402 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2403 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2405 lck = (kmp_user_lock_p)user_lock;
2407 __kmp_itt_lock_destroyed(lck);
2409 #if OMPT_SUPPORT && OMPT_OPTIONAL 2411 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2413 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2414 if (ompt_enabled.ompt_callback_lock_destroy) {
2415 kmp_user_lock_p lck;
2416 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2417 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2419 lck = (kmp_user_lock_p)user_lock;
2421 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2422 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2425 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2427 kmp_user_lock_p lck;
2429 if ((__kmp_user_lock_kind == lk_tas) &&
2430 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2431 lck = (kmp_user_lock_p)user_lock;
2434 else if ((__kmp_user_lock_kind == lk_futex) &&
2435 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2436 lck = (kmp_user_lock_p)user_lock;
2440 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2443 #if OMPT_SUPPORT && OMPT_OPTIONAL 2445 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2447 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2448 if (ompt_enabled.ompt_callback_lock_destroy) {
2449 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2450 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2455 __kmp_itt_lock_destroyed(lck);
2459 if ((__kmp_user_lock_kind == lk_tas) &&
2460 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2464 else if ((__kmp_user_lock_kind == lk_futex) &&
2465 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2470 __kmp_user_lock_free(user_lock, gtid, lck);
2472 #endif // KMP_USE_DYNAMIC_LOCK 2476 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2477 #if KMP_USE_DYNAMIC_LOCK 2480 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2481 __kmp_itt_lock_destroyed(ilk->lock);
2483 #if OMPT_SUPPORT && OMPT_OPTIONAL 2485 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2487 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2488 if (ompt_enabled.ompt_callback_lock_destroy) {
2489 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2490 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2493 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2495 #else // KMP_USE_DYNAMIC_LOCK 2497 kmp_user_lock_p lck;
2499 if ((__kmp_user_lock_kind == lk_tas) &&
2500 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2501 OMP_NEST_LOCK_T_SIZE)) {
2502 lck = (kmp_user_lock_p)user_lock;
2505 else if ((__kmp_user_lock_kind == lk_futex) &&
2506 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2507 OMP_NEST_LOCK_T_SIZE)) {
2508 lck = (kmp_user_lock_p)user_lock;
2512 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2515 #if OMPT_SUPPORT && OMPT_OPTIONAL 2517 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2519 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2520 if (ompt_enabled.ompt_callback_lock_destroy) {
2521 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2522 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2527 __kmp_itt_lock_destroyed(lck);
2530 DESTROY_NESTED_LOCK(lck);
2532 if ((__kmp_user_lock_kind == lk_tas) &&
2533 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2534 OMP_NEST_LOCK_T_SIZE)) {
2538 else if ((__kmp_user_lock_kind == lk_futex) &&
2539 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2540 OMP_NEST_LOCK_T_SIZE)) {
2545 __kmp_user_lock_free(user_lock, gtid, lck);
2547 #endif // KMP_USE_DYNAMIC_LOCK 2550 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2552 #if KMP_USE_DYNAMIC_LOCK 2553 int tag = KMP_EXTRACT_D_TAG(user_lock);
2555 __kmp_itt_lock_acquiring(
2559 #if OMPT_SUPPORT && OMPT_OPTIONAL 2561 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2563 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2564 if (ompt_enabled.ompt_callback_mutex_acquire) {
2565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2566 ompt_mutex_lock, omp_lock_hint_none,
2567 __ompt_get_mutex_impl_type(user_lock),
2568 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2571 #if KMP_USE_INLINED_TAS 2572 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2573 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2575 #elif KMP_USE_INLINED_FUTEX 2576 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2577 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2581 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2584 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2586 #if OMPT_SUPPORT && OMPT_OPTIONAL 2587 if (ompt_enabled.ompt_callback_mutex_acquired) {
2588 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2589 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2593 #else // KMP_USE_DYNAMIC_LOCK 2595 kmp_user_lock_p lck;
2597 if ((__kmp_user_lock_kind == lk_tas) &&
2598 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2599 lck = (kmp_user_lock_p)user_lock;
2602 else if ((__kmp_user_lock_kind == lk_futex) &&
2603 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2604 lck = (kmp_user_lock_p)user_lock;
2608 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2612 __kmp_itt_lock_acquiring(lck);
2614 #if OMPT_SUPPORT && OMPT_OPTIONAL 2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2618 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619 if (ompt_enabled.ompt_callback_mutex_acquire) {
2620 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2621 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2622 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2626 ACQUIRE_LOCK(lck, gtid);
2629 __kmp_itt_lock_acquired(lck);
2632 #if OMPT_SUPPORT && OMPT_OPTIONAL 2633 if (ompt_enabled.ompt_callback_mutex_acquired) {
2634 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2635 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2639 #endif // KMP_USE_DYNAMIC_LOCK 2642 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2643 #if KMP_USE_DYNAMIC_LOCK 2646 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2648 #if OMPT_SUPPORT && OMPT_OPTIONAL 2650 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2652 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2653 if (ompt_enabled.enabled) {
2654 if (ompt_enabled.ompt_callback_mutex_acquire) {
2655 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2656 ompt_mutex_nest_lock, omp_lock_hint_none,
2657 __ompt_get_mutex_impl_type(user_lock),
2658 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2662 int acquire_status =
2663 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2664 (void) acquire_status;
2666 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2669 #if OMPT_SUPPORT && OMPT_OPTIONAL 2670 if (ompt_enabled.enabled) {
2671 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2672 if (ompt_enabled.ompt_callback_mutex_acquired) {
2674 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2675 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2679 if (ompt_enabled.ompt_callback_nest_lock) {
2681 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2682 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2688 #else // KMP_USE_DYNAMIC_LOCK 2690 kmp_user_lock_p lck;
2692 if ((__kmp_user_lock_kind == lk_tas) &&
2693 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2694 OMP_NEST_LOCK_T_SIZE)) {
2695 lck = (kmp_user_lock_p)user_lock;
2698 else if ((__kmp_user_lock_kind == lk_futex) &&
2699 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2700 OMP_NEST_LOCK_T_SIZE)) {
2701 lck = (kmp_user_lock_p)user_lock;
2705 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2709 __kmp_itt_lock_acquiring(lck);
2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2715 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2716 if (ompt_enabled.enabled) {
2717 if (ompt_enabled.ompt_callback_mutex_acquire) {
2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2719 ompt_mutex_nest_lock, omp_lock_hint_none,
2720 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2726 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2729 __kmp_itt_lock_acquired(lck);
2732 #if OMPT_SUPPORT && OMPT_OPTIONAL 2733 if (ompt_enabled.enabled) {
2734 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2735 if (ompt_enabled.ompt_callback_mutex_acquired) {
2737 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2738 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2741 if (ompt_enabled.ompt_callback_nest_lock) {
2743 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2744 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2750 #endif // KMP_USE_DYNAMIC_LOCK 2753 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2754 #if KMP_USE_DYNAMIC_LOCK 2756 int tag = KMP_EXTRACT_D_TAG(user_lock);
2758 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2760 #if KMP_USE_INLINED_TAS 2761 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2762 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2764 #elif KMP_USE_INLINED_FUTEX 2765 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2766 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2770 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2773 #if OMPT_SUPPORT && OMPT_OPTIONAL 2775 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2777 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2778 if (ompt_enabled.ompt_callback_mutex_released) {
2779 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2780 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2784 #else // KMP_USE_DYNAMIC_LOCK 2786 kmp_user_lock_p lck;
2791 if ((__kmp_user_lock_kind == lk_tas) &&
2792 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2793 #if KMP_OS_LINUX && \ 2794 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2797 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2799 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2802 #if OMPT_SUPPORT && OMPT_OPTIONAL 2804 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2806 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2807 if (ompt_enabled.ompt_callback_mutex_released) {
2808 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2809 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2815 lck = (kmp_user_lock_p)user_lock;
2819 else if ((__kmp_user_lock_kind == lk_futex) &&
2820 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2821 lck = (kmp_user_lock_p)user_lock;
2825 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2829 __kmp_itt_lock_releasing(lck);
2832 RELEASE_LOCK(lck, gtid);
2834 #if OMPT_SUPPORT && OMPT_OPTIONAL 2836 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2838 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2839 if (ompt_enabled.ompt_callback_mutex_released) {
2840 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2841 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2845 #endif // KMP_USE_DYNAMIC_LOCK 2849 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2850 #if KMP_USE_DYNAMIC_LOCK 2853 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2855 int release_status =
2856 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2857 (void) release_status;
2859 #if OMPT_SUPPORT && OMPT_OPTIONAL 2861 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2863 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2864 if (ompt_enabled.enabled) {
2865 if (release_status == KMP_LOCK_RELEASED) {
2866 if (ompt_enabled.ompt_callback_mutex_released) {
2868 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2869 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2872 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2875 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2880 #else // KMP_USE_DYNAMIC_LOCK 2882 kmp_user_lock_p lck;
2886 if ((__kmp_user_lock_kind == lk_tas) &&
2887 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2888 OMP_NEST_LOCK_T_SIZE)) {
2889 #if KMP_OS_LINUX && \ 2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2892 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2897 #if OMPT_SUPPORT && OMPT_OPTIONAL 2898 int release_status = KMP_LOCK_STILL_HELD;
2901 if (--(tl->lk.depth_locked) == 0) {
2902 TCW_4(tl->lk.poll, 0);
2903 #if OMPT_SUPPORT && OMPT_OPTIONAL 2904 release_status = KMP_LOCK_RELEASED;
2909 #if OMPT_SUPPORT && OMPT_OPTIONAL 2911 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2913 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2914 if (ompt_enabled.enabled) {
2915 if (release_status == KMP_LOCK_RELEASED) {
2916 if (ompt_enabled.ompt_callback_mutex_released) {
2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2919 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2921 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2923 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2924 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2931 lck = (kmp_user_lock_p)user_lock;
2935 else if ((__kmp_user_lock_kind == lk_futex) &&
2936 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2937 OMP_NEST_LOCK_T_SIZE)) {
2938 lck = (kmp_user_lock_p)user_lock;
2942 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2946 __kmp_itt_lock_releasing(lck);
2950 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2951 #if OMPT_SUPPORT && OMPT_OPTIONAL 2953 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2955 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2956 if (ompt_enabled.enabled) {
2957 if (release_status == KMP_LOCK_RELEASED) {
2958 if (ompt_enabled.ompt_callback_mutex_released) {
2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2961 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2963 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2965 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2966 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2971 #endif // KMP_USE_DYNAMIC_LOCK 2975 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2978 #if KMP_USE_DYNAMIC_LOCK 2980 int tag = KMP_EXTRACT_D_TAG(user_lock);
2982 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2984 #if OMPT_SUPPORT && OMPT_OPTIONAL 2986 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2988 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2989 if (ompt_enabled.ompt_callback_mutex_acquire) {
2990 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2991 ompt_mutex_lock, omp_lock_hint_none,
2992 __ompt_get_mutex_impl_type(user_lock),
2993 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2996 #if KMP_USE_INLINED_TAS 2997 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2998 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3000 #elif KMP_USE_INLINED_FUTEX 3001 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3002 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3006 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3010 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3012 #if OMPT_SUPPORT && OMPT_OPTIONAL 3013 if (ompt_enabled.ompt_callback_mutex_acquired) {
3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3015 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3021 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3026 #else // KMP_USE_DYNAMIC_LOCK 3028 kmp_user_lock_p lck;
3031 if ((__kmp_user_lock_kind == lk_tas) &&
3032 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033 lck = (kmp_user_lock_p)user_lock;
3036 else if ((__kmp_user_lock_kind == lk_futex) &&
3037 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3038 lck = (kmp_user_lock_p)user_lock;
3042 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3046 __kmp_itt_lock_acquiring(lck);
3048 #if OMPT_SUPPORT && OMPT_OPTIONAL 3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3052 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3053 if (ompt_enabled.ompt_callback_mutex_acquire) {
3054 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3055 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3056 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3060 rc = TEST_LOCK(lck, gtid);
3063 __kmp_itt_lock_acquired(lck);
3065 __kmp_itt_lock_cancelled(lck);
3068 #if OMPT_SUPPORT && OMPT_OPTIONAL 3069 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3071 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3075 return (rc ? FTN_TRUE : FTN_FALSE);
3079 #endif // KMP_USE_DYNAMIC_LOCK 3083 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3084 #if KMP_USE_DYNAMIC_LOCK 3087 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3089 #if OMPT_SUPPORT && OMPT_OPTIONAL 3091 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3094 if (ompt_enabled.ompt_callback_mutex_acquire) {
3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3096 ompt_mutex_nest_lock, omp_lock_hint_none,
3097 __ompt_get_mutex_impl_type(user_lock),
3098 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3101 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3104 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3106 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 if (ompt_enabled.enabled && rc) {
3112 if (ompt_enabled.ompt_callback_mutex_acquired) {
3114 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3115 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3119 if (ompt_enabled.ompt_callback_nest_lock) {
3121 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3122 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3129 #else // KMP_USE_DYNAMIC_LOCK 3131 kmp_user_lock_p lck;
3134 if ((__kmp_user_lock_kind == lk_tas) &&
3135 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3136 OMP_NEST_LOCK_T_SIZE)) {
3137 lck = (kmp_user_lock_p)user_lock;
3140 else if ((__kmp_user_lock_kind == lk_futex) &&
3141 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3142 OMP_NEST_LOCK_T_SIZE)) {
3143 lck = (kmp_user_lock_p)user_lock;
3147 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3151 __kmp_itt_lock_acquiring(lck);
3154 #if OMPT_SUPPORT && OMPT_OPTIONAL 3156 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3158 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3159 if (ompt_enabled.enabled) &&
3160 ompt_enabled.ompt_callback_mutex_acquire) {
3161 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3162 ompt_mutex_nest_lock, omp_lock_hint_none,
3163 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3168 rc = TEST_NESTED_LOCK(lck, gtid);
3171 __kmp_itt_lock_acquired(lck);
3173 __kmp_itt_lock_cancelled(lck);
3176 #if OMPT_SUPPORT && OMPT_OPTIONAL 3177 if (ompt_enabled.enabled && rc) {
3179 if (ompt_enabled.ompt_callback_mutex_acquired) {
3181 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3182 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3185 if (ompt_enabled.ompt_callback_nest_lock) {
3187 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3188 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3197 #endif // KMP_USE_DYNAMIC_LOCK 3207 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3208 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3210 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3211 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3217 static __forceinline
void 3218 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3219 kmp_critical_name *crit) {
3225 kmp_user_lock_p lck;
3227 #if KMP_USE_DYNAMIC_LOCK 3229 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3232 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3233 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3234 KMP_GET_D_TAG(__kmp_user_lock_seq));
3236 __kmp_init_indirect_csptr(crit, loc, global_tid,
3237 KMP_GET_I_TAG(__kmp_user_lock_seq));
3243 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3244 lck = (kmp_user_lock_p)lk;
3245 KMP_DEBUG_ASSERT(lck != NULL);
3246 if (__kmp_env_consistency_check) {
3247 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3249 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
3251 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3253 KMP_DEBUG_ASSERT(lck != NULL);
3254 if (__kmp_env_consistency_check) {
3255 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3257 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
3260 #else // KMP_USE_DYNAMIC_LOCK 3265 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3266 lck = (kmp_user_lock_p)crit;
3268 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3270 KMP_DEBUG_ASSERT(lck != NULL);
3272 if (__kmp_env_consistency_check)
3273 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3275 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3277 #endif // KMP_USE_DYNAMIC_LOCK 3281 static __forceinline
void 3282 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3283 kmp_critical_name *crit) {
3285 kmp_user_lock_p lck;
3287 #if KMP_USE_DYNAMIC_LOCK 3289 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3290 lck = (kmp_user_lock_p)crit;
3291 if (__kmp_env_consistency_check)
3292 __kmp_pop_sync(global_tid, ct_critical, loc);
3293 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3295 kmp_indirect_lock_t *ilk =
3296 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3297 if (__kmp_env_consistency_check)
3298 __kmp_pop_sync(global_tid, ct_critical, loc);
3299 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3302 #else // KMP_USE_DYNAMIC_LOCK 3307 if (__kmp_base_user_lock_size > 32) {
3308 lck = *((kmp_user_lock_p *)crit);
3309 KMP_ASSERT(lck != NULL);
3311 lck = (kmp_user_lock_p)crit;
3314 if (__kmp_env_consistency_check)
3315 __kmp_pop_sync(global_tid, ct_critical, loc);
3317 __kmp_release_user_lock_with_checks(lck, global_tid);
3319 #endif // KMP_USE_DYNAMIC_LOCK 3322 static __forceinline
int 3323 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3328 if (th->th.th_teams_microtask) {
3329 *team_p = team = th->th.th_team;
3330 if (team->t.t_level == th->th.th_teams_level) {
3332 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3334 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3335 th->th.th_team = team->t.t_parent;
3336 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3337 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3338 *task_state = th->th.th_task_state;
3339 th->th.th_task_state = 0;
3347 static __forceinline
void 3348 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3350 th->th.th_info.ds.ds_tid = 0;
3351 th->th.th_team = team;
3352 th->th.th_team_nproc = team->t.t_nproc;
3353 th->th.th_task_team = team->t.t_task_team[task_state];
3354 th->th.th_task_state = task_state;
3375 size_t reduce_size,
void *reduce_data,
3376 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3377 kmp_critical_name *lck) {
3381 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3384 int teams_swapped = 0, task_state;
3385 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3393 if (!TCR_4(__kmp_init_parallel))
3394 __kmp_parallel_initialize();
3396 __kmp_resume_if_soft_paused();
3399 #if KMP_USE_DYNAMIC_LOCK 3400 if (__kmp_env_consistency_check)
3401 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3403 if (__kmp_env_consistency_check)
3404 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3407 th = __kmp_thread_from_gtid(global_tid);
3408 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3426 packed_reduction_method = __kmp_determine_reduction_method(
3427 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3428 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3430 if (packed_reduction_method == critical_reduce_block) {
3432 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3435 }
else if (packed_reduction_method == empty_reduce_block) {
3441 }
else if (packed_reduction_method == atomic_reduce_block) {
3451 if (__kmp_env_consistency_check)
3452 __kmp_pop_sync(global_tid, ct_reduce, loc);
3454 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3455 tree_reduce_block)) {
3475 ompt_frame_t *ompt_frame;
3476 if (ompt_enabled.enabled) {
3477 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3478 if (ompt_frame->enter_frame.ptr == NULL)
3479 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3480 OMPT_STORE_RETURN_ADDRESS(global_tid);
3484 __kmp_threads[global_tid]->th.th_ident = loc;
3487 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3488 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3489 retval = (retval != 0) ? (0) : (1);
3490 #if OMPT_SUPPORT && OMPT_OPTIONAL 3491 if (ompt_enabled.enabled) {
3492 ompt_frame->enter_frame = ompt_data_none;
3498 if (__kmp_env_consistency_check) {
3500 __kmp_pop_sync(global_tid, ct_reduce, loc);
3509 if (teams_swapped) {
3510 __kmp_restore_swapped_teams(th, team, task_state);
3514 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3515 global_tid, packed_reduction_method, retval));
3529 kmp_critical_name *lck) {
3531 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3533 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3535 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3537 if (packed_reduction_method == critical_reduce_block) {
3539 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3541 }
else if (packed_reduction_method == empty_reduce_block) {
3546 }
else if (packed_reduction_method == atomic_reduce_block) {
3553 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3554 tree_reduce_block)) {
3564 if (__kmp_env_consistency_check)
3565 __kmp_pop_sync(global_tid, ct_reduce, loc);
3567 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3568 global_tid, packed_reduction_method));
3591 size_t reduce_size,
void *reduce_data,
3592 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3593 kmp_critical_name *lck) {
3596 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3599 int teams_swapped = 0, task_state;
3601 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3609 if (!TCR_4(__kmp_init_parallel))
3610 __kmp_parallel_initialize();
3612 __kmp_resume_if_soft_paused();
3615 #if KMP_USE_DYNAMIC_LOCK 3616 if (__kmp_env_consistency_check)
3617 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3619 if (__kmp_env_consistency_check)
3620 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3623 th = __kmp_thread_from_gtid(global_tid);
3624 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3626 packed_reduction_method = __kmp_determine_reduction_method(
3627 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3628 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3630 if (packed_reduction_method == critical_reduce_block) {
3632 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3635 }
else if (packed_reduction_method == empty_reduce_block) {
3641 }
else if (packed_reduction_method == atomic_reduce_block) {
3645 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3646 tree_reduce_block)) {
3652 ompt_frame_t *ompt_frame;
3653 if (ompt_enabled.enabled) {
3654 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3655 if (ompt_frame->enter_frame.ptr == NULL)
3656 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3657 OMPT_STORE_RETURN_ADDRESS(global_tid);
3661 __kmp_threads[global_tid]->th.th_ident =
3665 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3666 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3667 retval = (retval != 0) ? (0) : (1);
3668 #if OMPT_SUPPORT && OMPT_OPTIONAL 3669 if (ompt_enabled.enabled) {
3670 ompt_frame->enter_frame = ompt_data_none;
3676 if (__kmp_env_consistency_check) {
3678 __kmp_pop_sync(global_tid, ct_reduce, loc);
3687 if (teams_swapped) {
3688 __kmp_restore_swapped_teams(th, team, task_state);
3692 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3693 global_tid, packed_reduction_method, retval));
3708 kmp_critical_name *lck) {
3710 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3713 int teams_swapped = 0, task_state;
3715 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3717 th = __kmp_thread_from_gtid(global_tid);
3718 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3720 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3725 if (packed_reduction_method == critical_reduce_block) {
3726 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3730 ompt_frame_t *ompt_frame;
3731 if (ompt_enabled.enabled) {
3732 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3733 if (ompt_frame->enter_frame.ptr == NULL)
3734 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3735 OMPT_STORE_RETURN_ADDRESS(global_tid);
3739 __kmp_threads[global_tid]->th.th_ident = loc;
3741 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3742 #if OMPT_SUPPORT && OMPT_OPTIONAL 3743 if (ompt_enabled.enabled) {
3744 ompt_frame->enter_frame = ompt_data_none;
3748 }
else if (packed_reduction_method == empty_reduce_block) {
3754 ompt_frame_t *ompt_frame;
3755 if (ompt_enabled.enabled) {
3756 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3757 if (ompt_frame->enter_frame.ptr == NULL)
3758 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3759 OMPT_STORE_RETURN_ADDRESS(global_tid);
3763 __kmp_threads[global_tid]->th.th_ident = loc;
3765 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3766 #if OMPT_SUPPORT && OMPT_OPTIONAL 3767 if (ompt_enabled.enabled) {
3768 ompt_frame->enter_frame = ompt_data_none;
3772 }
else if (packed_reduction_method == atomic_reduce_block) {
3775 ompt_frame_t *ompt_frame;
3776 if (ompt_enabled.enabled) {
3777 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3778 if (ompt_frame->enter_frame.ptr == NULL)
3779 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3780 OMPT_STORE_RETURN_ADDRESS(global_tid);
3785 __kmp_threads[global_tid]->th.th_ident = loc;
3787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3788 #if OMPT_SUPPORT && OMPT_OPTIONAL 3789 if (ompt_enabled.enabled) {
3790 ompt_frame->enter_frame = ompt_data_none;
3794 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3795 tree_reduce_block)) {
3798 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3806 if (teams_swapped) {
3807 __kmp_restore_swapped_teams(th, team, task_state);
3810 if (__kmp_env_consistency_check)
3811 __kmp_pop_sync(global_tid, ct_reduce, loc);
3813 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3814 global_tid, packed_reduction_method));
3819 #undef __KMP_GET_REDUCTION_METHOD 3820 #undef __KMP_SET_REDUCTION_METHOD 3824 kmp_uint64 __kmpc_get_taskid() {
3829 gtid = __kmp_get_gtid();
3833 thread = __kmp_thread_from_gtid(gtid);
3834 return thread->th.th_current_task->td_task_id;
3838 kmp_uint64 __kmpc_get_parent_taskid() {
3842 kmp_taskdata_t *parent_task;
3844 gtid = __kmp_get_gtid();
3848 thread = __kmp_thread_from_gtid(gtid);
3849 parent_task = thread->th.th_current_task->td_parent;
3850 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3866 const struct kmp_dim *dims) {
3868 kmp_int64 last, trace_count;
3869 kmp_info_t *th = __kmp_threads[gtid];
3870 kmp_team_t *team = th->th.th_team;
3872 kmp_disp_t *pr_buf = th->th.th_dispatch;
3873 dispatch_shared_info_t *sh_buf;
3877 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3878 gtid, num_dims, !team->t.t_serialized));
3879 KMP_DEBUG_ASSERT(dims != NULL);
3880 KMP_DEBUG_ASSERT(num_dims > 0);
3882 if (team->t.t_serialized) {
3883 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3886 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3887 idx = pr_buf->th_doacross_buf_idx++;
3889 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3892 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3893 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3894 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3895 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3896 pr_buf->th_doacross_info[0] =
3897 (kmp_int64)num_dims;
3900 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3901 pr_buf->th_doacross_info[2] = dims[0].lo;
3902 pr_buf->th_doacross_info[3] = dims[0].up;
3903 pr_buf->th_doacross_info[4] = dims[0].st;
3905 for (j = 1; j < num_dims; ++j) {
3908 if (dims[j].st == 1) {
3910 range_length = dims[j].up - dims[j].lo + 1;
3912 if (dims[j].st > 0) {
3913 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3914 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3916 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3918 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3921 pr_buf->th_doacross_info[last++] = range_length;
3922 pr_buf->th_doacross_info[last++] = dims[j].lo;
3923 pr_buf->th_doacross_info[last++] = dims[j].up;
3924 pr_buf->th_doacross_info[last++] = dims[j].st;
3929 if (dims[0].st == 1) {
3930 trace_count = dims[0].up - dims[0].lo + 1;
3931 }
else if (dims[0].st > 0) {
3932 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3933 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3935 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3936 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3938 for (j = 1; j < num_dims; ++j) {
3939 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3941 KMP_DEBUG_ASSERT(trace_count > 0);
3945 if (idx != sh_buf->doacross_buf_idx) {
3947 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3954 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3955 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3957 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3958 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3960 if (flags == NULL) {
3962 size_t size = trace_count / 8 + 8;
3963 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3965 sh_buf->doacross_flags = flags;
3966 }
else if (flags == (kmp_uint32 *)1) {
3969 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3971 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3978 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3979 pr_buf->th_doacross_flags =
3980 sh_buf->doacross_flags;
3982 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3985 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
3986 kmp_int32 shft, num_dims, i;
3988 kmp_int64 iter_number;
3989 kmp_info_t *th = __kmp_threads[gtid];
3990 kmp_team_t *team = th->th.th_team;
3992 kmp_int64 lo, up, st;
3994 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3995 if (team->t.t_serialized) {
3996 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4001 pr_buf = th->th.th_dispatch;
4002 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4003 num_dims = pr_buf->th_doacross_info[0];
4004 lo = pr_buf->th_doacross_info[2];
4005 up = pr_buf->th_doacross_info[3];
4006 st = pr_buf->th_doacross_info[4];
4008 if (vec[0] < lo || vec[0] > up) {
4009 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4010 "bounds [%lld,%lld]\n",
4011 gtid, vec[0], lo, up));
4014 iter_number = vec[0] - lo;
4015 }
else if (st > 0) {
4016 if (vec[0] < lo || vec[0] > up) {
4017 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4018 "bounds [%lld,%lld]\n",
4019 gtid, vec[0], lo, up));
4022 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4024 if (vec[0] > lo || vec[0] < up) {
4025 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4026 "bounds [%lld,%lld]\n",
4027 gtid, vec[0], lo, up));
4030 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4032 for (i = 1; i < num_dims; ++i) {
4034 kmp_int32 j = i * 4;
4035 ln = pr_buf->th_doacross_info[j + 1];
4036 lo = pr_buf->th_doacross_info[j + 2];
4037 up = pr_buf->th_doacross_info[j + 3];
4038 st = pr_buf->th_doacross_info[j + 4];
4040 if (vec[i] < lo || vec[i] > up) {
4041 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4042 "bounds [%lld,%lld]\n",
4043 gtid, vec[i], lo, up));
4047 }
else if (st > 0) {
4048 if (vec[i] < lo || vec[i] > up) {
4049 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4050 "bounds [%lld,%lld]\n",
4051 gtid, vec[i], lo, up));
4054 iter = (kmp_uint64)(vec[i] - lo) / st;
4056 if (vec[i] > lo || vec[i] < up) {
4057 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n",
4059 gtid, vec[i], lo, up));
4062 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4064 iter_number = iter + ln * iter_number;
4066 shft = iter_number % 32;
4069 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4074 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4075 gtid, (iter_number << 5) + shft));
4078 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4079 kmp_int32 shft, num_dims, i;
4081 kmp_int64 iter_number;
4082 kmp_info_t *th = __kmp_threads[gtid];
4083 kmp_team_t *team = th->th.th_team;
4087 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4088 if (team->t.t_serialized) {
4089 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4095 pr_buf = th->th.th_dispatch;
4096 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4097 num_dims = pr_buf->th_doacross_info[0];
4098 lo = pr_buf->th_doacross_info[2];
4099 st = pr_buf->th_doacross_info[4];
4101 iter_number = vec[0] - lo;
4102 }
else if (st > 0) {
4103 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4105 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4107 for (i = 1; i < num_dims; ++i) {
4109 kmp_int32 j = i * 4;
4110 ln = pr_buf->th_doacross_info[j + 1];
4111 lo = pr_buf->th_doacross_info[j + 2];
4112 st = pr_buf->th_doacross_info[j + 4];
4115 }
else if (st > 0) {
4116 iter = (kmp_uint64)(vec[i] - lo) / st;
4118 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4120 iter_number = iter + ln * iter_number;
4122 shft = iter_number % 32;
4126 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4127 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4128 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4129 (iter_number << 5) + shft));
4132 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4134 kmp_info_t *th = __kmp_threads[gtid];
4135 kmp_team_t *team = th->th.th_team;
4136 kmp_disp_t *pr_buf = th->th.th_dispatch;
4138 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4139 if (team->t.t_serialized) {
4140 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4143 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4144 if (num_done == th->th.th_team_nproc) {
4146 int idx = pr_buf->th_doacross_buf_idx - 1;
4147 dispatch_shared_info_t *sh_buf =
4148 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4149 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4150 (kmp_int64)&sh_buf->doacross_num_done);
4151 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4152 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4153 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4154 sh_buf->doacross_flags = NULL;
4155 sh_buf->doacross_num_done = 0;
4156 sh_buf->doacross_buf_idx +=
4157 __kmp_dispatch_num_buffers;
4160 pr_buf->th_doacross_flags = NULL;
4161 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4162 pr_buf->th_doacross_info = NULL;
4163 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4167 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4168 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4171 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4172 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4175 int __kmpc_get_target_offload(
void) {
4176 if (!__kmp_init_serial) {
4177 __kmp_serial_initialize();
4179 return __kmp_target_offload;
4182 int __kmpc_pause_resource(kmp_pause_status_t level) {
4183 if (!__kmp_init_serial) {
4186 return __kmp_pause_resource(level);
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)