14 #include "kmp_affinity.h" 15 #include "kmp_atomic.h" 16 #include "kmp_environment.h" 17 #include "kmp_error.h" 21 #include "kmp_settings.h" 22 #include "kmp_stats.h" 24 #include "kmp_wait_release.h" 25 #include "kmp_wrapper_getpid.h" 26 #include "kmp_dispatch.h" 27 #if KMP_USE_HIER_SCHED 28 #include "kmp_dispatch_hier.h" 32 #include "ompt-specific.h" 36 #define KMP_USE_PRCTL 0 42 #include "tsan_annotations.h" 44 #if defined(KMP_GOMP_COMPAT) 45 char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX
"alternative compiler support: yes";
49 char const __kmp_version_omp_api[] =
50 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
53 char const __kmp_version_lock[] =
54 KMP_VERSION_PREFIX
"lock type: run time selectable";
57 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 62 kmp_info_t __kmp_monitor;
67 void __kmp_cleanup(
void);
69 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
71 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
72 kmp_internal_control_t *new_icvs,
74 #if KMP_AFFINITY_SUPPORTED 75 static void __kmp_partition_places(kmp_team_t *team,
76 int update_master_only = 0);
78 static void __kmp_do_serial_initialize(
void);
79 void __kmp_fork_barrier(
int gtid,
int tid);
80 void __kmp_join_barrier(
int gtid);
81 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
82 kmp_internal_control_t *new_icvs,
ident_t *loc);
84 #ifdef USE_LOAD_BALANCE 85 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
88 static int __kmp_expand_threads(
int nNeed);
90 static int __kmp_unregister_root_other_thread(
int gtid);
92 static void __kmp_unregister_library(
void);
93 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
94 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
99 int __kmp_get_global_thread_id() {
101 kmp_info_t **other_threads;
109 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
110 __kmp_nth, __kmp_all_nth));
117 if (!TCR_4(__kmp_init_gtid))
120 #ifdef KMP_TDATA_GTID 121 if (TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
126 if (TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
128 return __kmp_gtid_get_specific();
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
132 stack_addr = (
char *)&stack_data;
133 other_threads = __kmp_threads;
146 for (i = 0; i < __kmp_threads_capacity; i++) {
148 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
152 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
153 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
157 if (stack_addr <= stack_base) {
158 size_t stack_diff = stack_base - stack_addr;
160 if (stack_diff <= stack_size) {
163 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
171 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 172 "thread, using TLS\n"));
173 i = __kmp_gtid_get_specific();
183 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
184 KMP_FATAL(StackOverflow, i);
187 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
188 if (stack_addr > stack_base) {
189 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
190 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
191 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
195 stack_base - stack_addr);
199 if (__kmp_storage_map) {
200 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
202 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
203 other_threads[i]->th.th_info.ds.ds_stacksize,
204 "th_%d stack (refinement)", i);
209 int __kmp_get_global_thread_id_reg() {
212 if (!__kmp_init_serial) {
215 #ifdef KMP_TDATA_GTID 216 if (TCR_4(__kmp_gtid_mode) >= 3) {
217 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
221 if (TCR_4(__kmp_gtid_mode) >= 2) {
222 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
223 gtid = __kmp_gtid_get_specific();
226 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
227 gtid = __kmp_get_global_thread_id();
231 if (gtid == KMP_GTID_DNE) {
233 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 234 "Registering a new gtid.\n"));
235 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
236 if (!__kmp_init_serial) {
237 __kmp_do_serial_initialize();
238 gtid = __kmp_gtid_get_specific();
240 gtid = __kmp_register_root(FALSE);
242 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
246 KMP_DEBUG_ASSERT(gtid >= 0);
252 void __kmp_check_stack_overlap(kmp_info_t *th) {
254 char *stack_beg = NULL;
255 char *stack_end = NULL;
258 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
259 if (__kmp_storage_map) {
260 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
261 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
263 gtid = __kmp_gtid_from_thread(th);
265 if (gtid == KMP_GTID_MONITOR) {
266 __kmp_print_storage_map_gtid(
267 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
268 "th_%s stack (%s)",
"mon",
269 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%d stack (%s)", gtid,
274 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 gtid = __kmp_gtid_from_thread(th);
281 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
283 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
284 if (stack_beg == NULL) {
285 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
286 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
289 for (f = 0; f < __kmp_threads_capacity; f++) {
290 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
292 if (f_th && f_th != th) {
293 char *other_stack_end =
294 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
295 char *other_stack_beg =
296 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
297 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
298 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
301 if (__kmp_storage_map)
302 __kmp_print_storage_map_gtid(
303 -1, other_stack_beg, other_stack_end,
304 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
305 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
307 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
313 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
318 void __kmp_infinite_loop(
void) {
319 static int done = FALSE;
326 #define MAX_MESSAGE 512 328 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
329 char const *format, ...) {
330 char buffer[MAX_MESSAGE];
333 va_start(ap, format);
334 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
335 p2, (
unsigned long)size, format);
336 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
337 __kmp_vprintf(kmp_err, buffer, ap);
338 #if KMP_PRINT_DATA_PLACEMENT 341 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
342 if (__kmp_storage_map_verbose) {
343 node = __kmp_get_host_node(p1);
345 __kmp_storage_map_verbose = FALSE;
349 int localProc = __kmp_get_cpu_from_gtid(gtid);
351 const int page_size = KMP_GET_PAGE_SIZE();
353 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
354 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
356 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
359 __kmp_printf_no_lock(
" GTID %d\n", gtid);
368 (
char *)p1 += page_size;
369 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
370 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
374 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
375 (
char *)p1 + (page_size - 1),
376 __kmp_get_host_node(p1));
378 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
379 (
char *)p2 + (page_size - 1),
380 __kmp_get_host_node(p2));
386 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
389 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
392 void __kmp_warn(
char const *format, ...) {
393 char buffer[MAX_MESSAGE];
396 if (__kmp_generate_warnings == kmp_warnings_off) {
400 va_start(ap, format);
402 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
403 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
404 __kmp_vprintf(kmp_err, buffer, ap);
405 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410 void __kmp_abort_process() {
412 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
414 if (__kmp_debug_buf) {
415 __kmp_dump_debug_buffer();
418 if (KMP_OS_WINDOWS) {
421 __kmp_global.g.g_abort = SIGABRT;
438 __kmp_infinite_loop();
439 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
443 void __kmp_abort_thread(
void) {
446 __kmp_infinite_loop();
452 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
453 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
456 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
457 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
460 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
462 __kmp_print_storage_map_gtid(
463 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
464 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
466 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
467 &thr->th.th_bar[bs_plain_barrier + 1],
468 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
472 &thr->th.th_bar[bs_forkjoin_barrier + 1],
473 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
476 #if KMP_FAST_REDUCTION_BARRIER 477 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
478 &thr->th.th_bar[bs_reduction_barrier + 1],
479 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
481 #endif // KMP_FAST_REDUCTION_BARRIER 487 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
488 int team_id,
int num_thr) {
489 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
490 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
493 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
494 &team->t.t_bar[bs_last_barrier],
495 sizeof(kmp_balign_team_t) * bs_last_barrier,
496 "%s_%d.t_bar", header, team_id);
498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
499 &team->t.t_bar[bs_plain_barrier + 1],
500 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
504 &team->t.t_bar[bs_forkjoin_barrier + 1],
505 sizeof(kmp_balign_team_t),
506 "%s_%d.t_bar[forkjoin]", header, team_id);
508 #if KMP_FAST_REDUCTION_BARRIER 509 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
510 &team->t.t_bar[bs_reduction_barrier + 1],
511 sizeof(kmp_balign_team_t),
512 "%s_%d.t_bar[reduction]", header, team_id);
513 #endif // KMP_FAST_REDUCTION_BARRIER 515 __kmp_print_storage_map_gtid(
516 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
517 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
519 __kmp_print_storage_map_gtid(
520 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
521 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
523 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
524 &team->t.t_disp_buffer[num_disp_buff],
525 sizeof(dispatch_shared_info_t) * num_disp_buff,
526 "%s_%d.t_disp_buffer", header, team_id);
529 static void __kmp_init_allocator() { __kmp_init_memkind(); }
530 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
537 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
539 __kmp_init_bootstrap_lock(lck);
542 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
560 for (i = 0; i < __kmp_threads_capacity; ++i) {
563 kmp_info_t *th = __kmp_threads[i];
566 int gtid = th->th.th_info.ds.ds_gtid;
567 if (gtid == gtid_req)
572 int alive = __kmp_is_thread_alive(th, &exit_val);
577 if (thread_count == 0)
583 __kmp_reset_lock(&__kmp_forkjoin_lock);
585 __kmp_reset_lock(&__kmp_stdio_lock);
589 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
594 case DLL_PROCESS_ATTACH:
595 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
599 case DLL_PROCESS_DETACH:
600 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
602 if (lpReserved != NULL) {
628 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
631 __kmp_internal_end_library(__kmp_gtid_get_specific());
635 case DLL_THREAD_ATTACH:
636 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
642 case DLL_THREAD_DETACH:
643 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
645 __kmp_internal_end_thread(__kmp_gtid_get_specific());
656 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
657 int gtid = *gtid_ref;
658 #ifdef BUILD_PARALLEL_ORDERED 659 kmp_team_t *team = __kmp_team_from_gtid(gtid);
662 if (__kmp_env_consistency_check) {
663 if (__kmp_threads[gtid]->th.th_root->r.r_active)
664 #if KMP_USE_DYNAMIC_LOCK 665 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
667 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
670 #ifdef BUILD_PARALLEL_ORDERED 671 if (!team->t.t_serialized) {
673 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
681 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
682 int gtid = *gtid_ref;
683 #ifdef BUILD_PARALLEL_ORDERED 684 int tid = __kmp_tid_from_gtid(gtid);
685 kmp_team_t *team = __kmp_team_from_gtid(gtid);
688 if (__kmp_env_consistency_check) {
689 if (__kmp_threads[gtid]->th.th_root->r.r_active)
690 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
692 #ifdef BUILD_PARALLEL_ORDERED 693 if (!team->t.t_serialized) {
698 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
708 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
713 if (!TCR_4(__kmp_init_parallel))
714 __kmp_parallel_initialize();
715 __kmp_resume_if_soft_paused();
717 th = __kmp_threads[gtid];
718 team = th->th.th_team;
721 th->th.th_ident = id_ref;
723 if (team->t.t_serialized) {
726 kmp_int32 old_this = th->th.th_local.this_construct;
728 ++th->th.th_local.this_construct;
732 if (team->t.t_construct == old_this) {
733 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
734 th->th.th_local.this_construct);
737 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
738 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
739 team->t.t_active_level ==
741 __kmp_itt_metadata_single(id_ref);
746 if (__kmp_env_consistency_check) {
747 if (status && push_ws) {
748 __kmp_push_workshare(gtid, ct_psingle, id_ref);
750 __kmp_check_workshare(gtid, ct_psingle, id_ref);
755 __kmp_itt_single_start(gtid);
761 void __kmp_exit_single(
int gtid) {
763 __kmp_itt_single_end(gtid);
765 if (__kmp_env_consistency_check)
766 __kmp_pop_workshare(gtid, ct_psingle, NULL);
775 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
776 int master_tid,
int set_nthreads,
780 KMP_DEBUG_ASSERT(__kmp_init_serial);
781 KMP_DEBUG_ASSERT(root && parent_team);
782 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
786 new_nthreads = set_nthreads;
787 if (!get__dynamic_2(parent_team, master_tid)) {
790 #ifdef USE_LOAD_BALANCE 791 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
792 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
793 if (new_nthreads == 1) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 795 "reservation to 1 thread\n",
799 if (new_nthreads < set_nthreads) {
800 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 801 "reservation to %d threads\n",
802 master_tid, new_nthreads));
806 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
807 new_nthreads = __kmp_avail_proc - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (new_nthreads <= 1) {
810 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 811 "reservation to 1 thread\n",
815 if (new_nthreads < set_nthreads) {
816 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 817 "reservation to %d threads\n",
818 master_tid, new_nthreads));
820 new_nthreads = set_nthreads;
822 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
823 if (set_nthreads > 2) {
824 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
825 new_nthreads = (new_nthreads % set_nthreads) + 1;
826 if (new_nthreads == 1) {
827 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 828 "reservation to 1 thread\n",
832 if (new_nthreads < set_nthreads) {
833 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 834 "reservation to %d threads\n",
835 master_tid, new_nthreads));
843 if (__kmp_nth + new_nthreads -
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
846 int tl_nthreads = __kmp_max_nth - __kmp_nth +
847 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
848 if (tl_nthreads <= 0) {
853 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
854 __kmp_reserve_warn = 1;
855 __kmp_msg(kmp_ms_warning,
856 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
857 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
859 if (tl_nthreads == 1) {
860 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 861 "reduced reservation to 1 thread\n",
865 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 866 "reservation to %d threads\n",
867 master_tid, tl_nthreads));
868 new_nthreads = tl_nthreads;
872 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
873 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
874 if (cg_nthreads + new_nthreads -
875 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
877 int tl_nthreads = max_cg_threads - cg_nthreads +
878 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
879 if (tl_nthreads <= 0) {
884 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
885 __kmp_reserve_warn = 1;
886 __kmp_msg(kmp_ms_warning,
887 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
888 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
890 if (tl_nthreads == 1) {
891 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 892 "reduced reservation to 1 thread\n",
896 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 897 "reservation to %d threads\n",
898 master_tid, tl_nthreads));
899 new_nthreads = tl_nthreads;
905 capacity = __kmp_threads_capacity;
906 if (TCR_PTR(__kmp_threads[0]) == NULL) {
909 if (__kmp_nth + new_nthreads -
910 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
913 int slotsRequired = __kmp_nth + new_nthreads -
914 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 int slotsAdded = __kmp_expand_threads(slotsRequired);
917 if (slotsAdded < slotsRequired) {
919 new_nthreads -= (slotsRequired - slotsAdded);
920 KMP_ASSERT(new_nthreads >= 1);
923 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924 __kmp_reserve_warn = 1;
925 if (__kmp_tp_cached) {
926 __kmp_msg(kmp_ms_warning,
927 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
940 if (new_nthreads == 1) {
942 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 943 "dead roots and rechecking; requested %d threads\n",
944 __kmp_get_gtid(), set_nthreads));
946 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 948 __kmp_get_gtid(), new_nthreads, set_nthreads));
957 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
958 kmp_info_t *master_th,
int master_gtid) {
962 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
963 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
967 master_th->th.th_info.ds.ds_tid = 0;
968 master_th->th.th_team = team;
969 master_th->th.th_team_nproc = team->t.t_nproc;
970 master_th->th.th_team_master = master_th;
971 master_th->th.th_team_serialized = FALSE;
972 master_th->th.th_dispatch = &team->t.t_dispatch[0];
975 #if KMP_NESTED_HOT_TEAMS 977 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
980 int level = team->t.t_active_level - 1;
981 if (master_th->th.th_teams_microtask) {
982 if (master_th->th.th_teams_size.nteams > 1) {
986 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
987 master_th->th.th_teams_level == team->t.t_level) {
992 if (level < __kmp_hot_teams_max_level) {
993 if (hot_teams[level].hot_team) {
995 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
999 hot_teams[level].hot_team = team;
1000 hot_teams[level].hot_team_nth = team->t.t_nproc;
1007 use_hot_team = team == root->r.r_hot_team;
1009 if (!use_hot_team) {
1012 team->t.t_threads[0] = master_th;
1013 __kmp_initialize_info(master_th, team, 0, master_gtid);
1016 for (i = 1; i < team->t.t_nproc; i++) {
1019 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1020 team->t.t_threads[i] = thr;
1021 KMP_DEBUG_ASSERT(thr);
1022 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1024 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1025 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1026 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1027 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1028 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1029 team->t.t_bar[bs_plain_barrier].b_arrived));
1030 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1031 thr->th.th_teams_level = master_th->th.th_teams_level;
1032 thr->th.th_teams_size = master_th->th.th_teams_size;
1035 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1036 for (b = 0; b < bs_last_barrier; ++b) {
1037 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1038 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1040 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1046 #if KMP_AFFINITY_SUPPORTED 1047 __kmp_partition_places(team);
1051 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1052 for (i = 0; i < team->t.t_nproc; i++) {
1053 kmp_info_t *thr = team->t.t_threads[i];
1054 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1055 thr->th.th_prev_level != team->t.t_level) {
1056 team->t.t_display_affinity = 1;
1065 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1069 inline static void propagateFPControl(kmp_team_t *team) {
1070 if (__kmp_inherit_fp_control) {
1071 kmp_int16 x87_fpu_control_word;
1075 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1076 __kmp_store_mxcsr(&mxcsr);
1077 mxcsr &= KMP_X86_MXCSR_MASK;
1088 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1089 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1092 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1096 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1102 inline static void updateHWFPControl(kmp_team_t *team) {
1103 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1106 kmp_int16 x87_fpu_control_word;
1108 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1109 __kmp_store_mxcsr(&mxcsr);
1110 mxcsr &= KMP_X86_MXCSR_MASK;
1112 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1113 __kmp_clear_x87_fpu_status_word();
1114 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1117 if (team->t.t_mxcsr != mxcsr) {
1118 __kmp_load_mxcsr(&team->t.t_mxcsr);
1123 #define propagateFPControl(x) ((void)0) 1124 #define updateHWFPControl(x) ((void)0) 1127 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1132 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1133 kmp_info_t *this_thr;
1134 kmp_team_t *serial_team;
1136 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1143 if (!TCR_4(__kmp_init_parallel))
1144 __kmp_parallel_initialize();
1145 __kmp_resume_if_soft_paused();
1147 this_thr = __kmp_threads[global_tid];
1148 serial_team = this_thr->th.th_serial_team;
1151 KMP_DEBUG_ASSERT(serial_team);
1154 if (__kmp_tasking_mode != tskm_immediate_exec) {
1156 this_thr->th.th_task_team ==
1157 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1158 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1160 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1161 "team %p, new task_team = NULL\n",
1162 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1163 this_thr->th.th_task_team = NULL;
1166 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1167 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1168 proc_bind = proc_bind_false;
1169 }
else if (proc_bind == proc_bind_default) {
1172 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1175 this_thr->th.th_set_proc_bind = proc_bind_default;
1178 ompt_data_t ompt_parallel_data = ompt_data_none;
1179 ompt_data_t *implicit_task_data;
1180 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1181 if (ompt_enabled.enabled &&
1182 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1184 ompt_task_info_t *parent_task_info;
1185 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1187 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1188 if (ompt_enabled.ompt_callback_parallel_begin) {
1191 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1192 &(parent_task_info->task_data), &(parent_task_info->frame),
1193 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1197 #endif // OMPT_SUPPORT 1199 if (this_thr->th.th_team != serial_team) {
1201 int level = this_thr->th.th_team->t.t_level;
1203 if (serial_team->t.t_serialized) {
1206 kmp_team_t *new_team;
1208 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1211 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1215 proc_bind, &this_thr->th.th_current_task->td_icvs,
1216 0 USE_NESTED_HOT_ARG(NULL));
1217 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1218 KMP_ASSERT(new_team);
1221 new_team->t.t_threads[0] = this_thr;
1222 new_team->t.t_parent = this_thr->th.th_team;
1223 serial_team = new_team;
1224 this_thr->th.th_serial_team = serial_team;
1228 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1229 global_tid, serial_team));
1237 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1238 global_tid, serial_team));
1242 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1243 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1244 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1245 serial_team->t.t_ident = loc;
1246 serial_team->t.t_serialized = 1;
1247 serial_team->t.t_nproc = 1;
1248 serial_team->t.t_parent = this_thr->th.th_team;
1249 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1250 this_thr->th.th_team = serial_team;
1251 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1253 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1254 this_thr->th.th_current_task));
1255 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1256 this_thr->th.th_current_task->td_flags.executing = 0;
1258 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1263 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1264 &this_thr->th.th_current_task->td_parent->td_icvs);
1268 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1269 this_thr->th.th_current_task->td_icvs.nproc =
1270 __kmp_nested_nth.nth[level + 1];
1273 if (__kmp_nested_proc_bind.used &&
1274 (level + 1 < __kmp_nested_proc_bind.used)) {
1275 this_thr->th.th_current_task->td_icvs.proc_bind =
1276 __kmp_nested_proc_bind.bind_types[level + 1];
1280 serial_team->t.t_pkfn = (microtask_t)(~0);
1282 this_thr->th.th_info.ds.ds_tid = 0;
1285 this_thr->th.th_team_nproc = 1;
1286 this_thr->th.th_team_master = this_thr;
1287 this_thr->th.th_team_serialized = 1;
1289 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1290 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1291 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1293 propagateFPControl(serial_team);
1296 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1297 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1298 serial_team->t.t_dispatch->th_disp_buffer =
1299 (dispatch_private_info_t *)__kmp_allocate(
1300 sizeof(dispatch_private_info_t));
1302 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1309 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1310 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1311 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1312 ++serial_team->t.t_serialized;
1313 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1316 int level = this_thr->th.th_team->t.t_level;
1319 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1320 this_thr->th.th_current_task->td_icvs.nproc =
1321 __kmp_nested_nth.nth[level + 1];
1323 serial_team->t.t_level++;
1324 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1325 "of serial team %p to %d\n",
1326 global_tid, serial_team, serial_team->t.t_level));
1329 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1331 dispatch_private_info_t *disp_buffer =
1332 (dispatch_private_info_t *)__kmp_allocate(
1333 sizeof(dispatch_private_info_t));
1334 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1335 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1337 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1341 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1345 if (__kmp_display_affinity) {
1346 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1347 this_thr->th.th_prev_num_threads != 1) {
1349 __kmp_aux_display_affinity(global_tid, NULL);
1350 this_thr->th.th_prev_level = serial_team->t.t_level;
1351 this_thr->th.th_prev_num_threads = 1;
1355 if (__kmp_env_consistency_check)
1356 __kmp_push_parallel(global_tid, NULL);
1358 serial_team->t.ompt_team_info.master_return_address = codeptr;
1359 if (ompt_enabled.enabled &&
1360 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1361 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1363 ompt_lw_taskteam_t lw_taskteam;
1364 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1365 &ompt_parallel_data, codeptr);
1367 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1371 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1372 if (ompt_enabled.ompt_callback_implicit_task) {
1373 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1374 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1375 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1376 OMPT_CUR_TASK_INFO(this_thr)
1377 ->thread_num = __kmp_tid_from_gtid(global_tid);
1381 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1382 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1389 int __kmp_fork_call(
ident_t *loc,
int gtid,
1390 enum fork_context_e call_context,
1391 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1393 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1402 int master_this_cons;
1404 kmp_team_t *parent_team;
1405 kmp_info_t *master_th;
1409 int master_set_numthreads;
1413 #if KMP_NESTED_HOT_TEAMS 1414 kmp_hot_team_ptr_t **p_hot_teams;
1417 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1420 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1421 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1424 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1426 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1427 __kmp_stkpadding += (short)((kmp_int64)dummy);
1433 if (!TCR_4(__kmp_init_parallel))
1434 __kmp_parallel_initialize();
1435 __kmp_resume_if_soft_paused();
1438 master_th = __kmp_threads[gtid];
1440 parent_team = master_th->th.th_team;
1441 master_tid = master_th->th.th_info.ds.ds_tid;
1442 master_this_cons = master_th->th.th_local.this_construct;
1443 root = master_th->th.th_root;
1444 master_active = root->r.r_active;
1445 master_set_numthreads = master_th->th.th_set_nproc;
1448 ompt_data_t ompt_parallel_data = ompt_data_none;
1449 ompt_data_t *parent_task_data;
1450 ompt_frame_t *ompt_frame;
1451 ompt_data_t *implicit_task_data;
1452 void *return_address = NULL;
1454 if (ompt_enabled.enabled) {
1455 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1457 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1462 level = parent_team->t.t_level;
1464 active_level = parent_team->t.t_active_level;
1466 teams_level = master_th->th.th_teams_level;
1467 #if KMP_NESTED_HOT_TEAMS 1468 p_hot_teams = &master_th->th.th_hot_teams;
1469 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1470 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1474 (*p_hot_teams)[0].hot_team_nth = 1;
1479 if (ompt_enabled.enabled) {
1480 if (ompt_enabled.ompt_callback_parallel_begin) {
1481 int team_size = master_set_numthreads
1482 ? master_set_numthreads
1483 : get__nproc_2(parent_team, master_tid);
1484 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1485 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1486 OMPT_INVOKER(call_context), return_address);
1488 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1492 master_th->th.th_ident = loc;
1494 if (master_th->th.th_teams_microtask && ap &&
1495 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1499 parent_team->t.t_ident = loc;
1500 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1501 parent_team->t.t_argc = argc;
1502 argv = (
void **)parent_team->t.t_argv;
1503 for (i = argc - 1; i >= 0; --i)
1505 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1506 *argv++ = va_arg(*ap,
void *);
1508 *argv++ = va_arg(ap,
void *);
1511 if (parent_team == master_th->th.th_serial_team) {
1514 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1517 parent_team->t.t_serialized--;
1520 void **exit_runtime_p;
1522 ompt_lw_taskteam_t lw_taskteam;
1524 if (ompt_enabled.enabled) {
1525 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1526 &ompt_parallel_data, return_address);
1527 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1529 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1533 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1534 if (ompt_enabled.ompt_callback_implicit_task) {
1535 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1536 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1537 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1538 OMPT_CUR_TASK_INFO(master_th)
1539 ->thread_num = __kmp_tid_from_gtid(gtid);
1543 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1545 exit_runtime_p = &dummy;
1550 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1551 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1552 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1561 *exit_runtime_p = NULL;
1562 if (ompt_enabled.enabled) {
1563 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1564 if (ompt_enabled.ompt_callback_implicit_task) {
1565 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1566 ompt_scope_end, NULL, implicit_task_data, 1,
1567 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1569 __ompt_lw_taskteam_unlink(master_th);
1571 if (ompt_enabled.ompt_callback_parallel_end) {
1572 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1573 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1574 OMPT_INVOKER(call_context), return_address);
1576 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1582 parent_team->t.t_pkfn = microtask;
1583 parent_team->t.t_invoke = invoker;
1584 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1585 parent_team->t.t_active_level++;
1586 parent_team->t.t_level++;
1587 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1590 if (master_set_numthreads) {
1591 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1593 kmp_info_t **other_threads = parent_team->t.t_threads;
1594 parent_team->t.t_nproc = master_set_numthreads;
1595 for (i = 0; i < master_set_numthreads; ++i) {
1596 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1600 master_th->th.th_set_nproc = 0;
1604 if (__kmp_debugging) {
1605 int nth = __kmp_omp_num_threads(loc);
1607 master_set_numthreads = nth;
1612 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1613 "master_th=%p, gtid=%d\n",
1614 root, parent_team, master_th, gtid));
1615 __kmp_internal_fork(loc, gtid, parent_team);
1616 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1617 "master_th=%p, gtid=%d\n",
1618 root, parent_team, master_th, gtid));
1621 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1622 parent_team->t.t_id, parent_team->t.t_pkfn));
1624 if (!parent_team->t.t_invoke(gtid)) {
1625 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1627 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1628 parent_team->t.t_id, parent_team->t.t_pkfn));
1631 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1637 if (__kmp_tasking_mode != tskm_immediate_exec) {
1638 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1639 parent_team->t.t_task_team[master_th->th.th_task_state]);
1643 if (parent_team->t.t_active_level >=
1644 master_th->th.th_current_task->td_icvs.max_active_levels) {
1647 int enter_teams = ((ap == NULL && active_level == 0) ||
1648 (ap && teams_level > 0 && teams_level == level));
1650 master_set_numthreads
1651 ? master_set_numthreads
1660 if ((get__max_active_levels(master_th) == 1 &&
1661 (root->r.r_in_parallel && !enter_teams)) ||
1662 (__kmp_library == library_serial)) {
1663 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1671 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1676 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1677 nthreads, enter_teams);
1678 if (nthreads == 1) {
1682 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1686 KMP_DEBUG_ASSERT(nthreads > 0);
1689 master_th->th.th_set_nproc = 0;
1692 if (nthreads == 1) {
1694 #if KMP_OS_LINUX && \ 1695 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1698 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1703 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1707 if (call_context == fork_context_intel) {
1709 master_th->th.th_serial_team->t.t_ident = loc;
1712 master_th->th.th_serial_team->t.t_level--;
1717 void **exit_runtime_p;
1718 ompt_task_info_t *task_info;
1720 ompt_lw_taskteam_t lw_taskteam;
1722 if (ompt_enabled.enabled) {
1723 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1724 &ompt_parallel_data, return_address);
1726 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1729 task_info = OMPT_CUR_TASK_INFO(master_th);
1730 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1731 if (ompt_enabled.ompt_callback_implicit_task) {
1732 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1733 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1734 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1735 OMPT_CUR_TASK_INFO(master_th)
1736 ->thread_num = __kmp_tid_from_gtid(gtid);
1740 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1742 exit_runtime_p = &dummy;
1747 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1748 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1749 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1750 parent_team->t.t_argv
1759 if (ompt_enabled.enabled) {
1760 exit_runtime_p = NULL;
1761 if (ompt_enabled.ompt_callback_implicit_task) {
1762 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1763 ompt_scope_end, NULL, &(task_info->task_data), 1,
1764 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1767 __ompt_lw_taskteam_unlink(master_th);
1768 if (ompt_enabled.ompt_callback_parallel_end) {
1769 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1770 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1771 OMPT_INVOKER(call_context), return_address);
1773 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1776 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1777 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1778 master_th->th.th_serial_team);
1779 team = master_th->th.th_team;
1781 team->t.t_invoke = invoker;
1782 __kmp_alloc_argv_entries(argc, team, TRUE);
1783 team->t.t_argc = argc;
1784 argv = (
void **)team->t.t_argv;
1786 for (i = argc - 1; i >= 0; --i)
1788 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1789 *argv++ = va_arg(*ap,
void *);
1791 *argv++ = va_arg(ap,
void *);
1794 for (i = 0; i < argc; ++i)
1796 argv[i] = parent_team->t.t_argv[i];
1805 for (i = argc - 1; i >= 0; --i)
1807 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1808 *argv++ = va_arg(*ap,
void *);
1810 *argv++ = va_arg(ap,
void *);
1816 void **exit_runtime_p;
1817 ompt_task_info_t *task_info;
1819 ompt_lw_taskteam_t lw_taskteam;
1821 if (ompt_enabled.enabled) {
1822 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1823 &ompt_parallel_data, return_address);
1824 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1826 task_info = OMPT_CUR_TASK_INFO(master_th);
1827 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1830 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1831 if (ompt_enabled.ompt_callback_implicit_task) {
1832 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1833 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1834 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1835 OMPT_CUR_TASK_INFO(master_th)
1836 ->thread_num = __kmp_tid_from_gtid(gtid);
1840 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1842 exit_runtime_p = &dummy;
1847 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1848 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1849 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1858 if (ompt_enabled.enabled) {
1859 *exit_runtime_p = NULL;
1860 if (ompt_enabled.ompt_callback_implicit_task) {
1861 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1862 ompt_scope_end, NULL, &(task_info->task_data), 1,
1863 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1866 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1867 __ompt_lw_taskteam_unlink(master_th);
1868 if (ompt_enabled.ompt_callback_parallel_end) {
1869 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1870 &ompt_parallel_data, parent_task_data,
1871 OMPT_INVOKER(call_context), return_address);
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1877 }
else if (call_context == fork_context_gnu) {
1879 ompt_lw_taskteam_t lwt;
1880 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1883 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1884 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1889 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1892 KMP_ASSERT2(call_context < fork_context_last,
1893 "__kmp_fork_call: unknown fork_context parameter");
1896 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1903 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1904 "curtask=%p, curtask_max_aclevel=%d\n",
1905 parent_team->t.t_active_level, master_th,
1906 master_th->th.th_current_task,
1907 master_th->th.th_current_task->td_icvs.max_active_levels));
1911 master_th->th.th_current_task->td_flags.executing = 0;
1913 if (!master_th->th.th_teams_microtask || level > teams_level) {
1915 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1919 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1920 if ((level + 1 < __kmp_nested_nth.used) &&
1921 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1922 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1928 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1929 kmp_proc_bind_t proc_bind_icv =
1931 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1932 proc_bind = proc_bind_false;
1934 if (proc_bind == proc_bind_default) {
1937 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1943 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1944 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1945 master_th->th.th_current_task->td_icvs.proc_bind)) {
1946 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1951 master_th->th.th_set_proc_bind = proc_bind_default;
1953 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
1954 kmp_internal_control_t new_icvs;
1955 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1956 new_icvs.next = NULL;
1957 if (nthreads_icv > 0) {
1958 new_icvs.nproc = nthreads_icv;
1960 if (proc_bind_icv != proc_bind_default) {
1961 new_icvs.proc_bind = proc_bind_icv;
1965 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1966 team = __kmp_allocate_team(root, nthreads, nthreads,
1970 proc_bind, &new_icvs,
1971 argc USE_NESTED_HOT_ARG(master_th));
1974 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1975 team = __kmp_allocate_team(root, nthreads, nthreads,
1980 &master_th->th.th_current_task->td_icvs,
1981 argc USE_NESTED_HOT_ARG(master_th));
1984 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
1987 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
1988 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
1989 KMP_CHECK_UPDATE(team->t.t_ident, loc);
1990 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
1991 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
1993 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
1996 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
1998 if (!master_th->th.th_teams_microtask || level > teams_level) {
1999 int new_level = parent_team->t.t_level + 1;
2000 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2001 new_level = parent_team->t.t_active_level + 1;
2002 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2005 int new_level = parent_team->t.t_level;
2006 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2007 new_level = parent_team->t.t_active_level;
2008 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2010 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2012 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2014 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2015 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2018 propagateFPControl(team);
2020 if (__kmp_tasking_mode != tskm_immediate_exec) {
2023 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2024 parent_team->t.t_task_team[master_th->th.th_task_state]);
2025 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2026 "%p, new task_team %p / team %p\n",
2027 __kmp_gtid_from_thread(master_th),
2028 master_th->th.th_task_team, parent_team,
2029 team->t.t_task_team[master_th->th.th_task_state], team));
2031 if (active_level || master_th->th.th_task_team) {
2033 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2034 if (master_th->th.th_task_state_top >=
2035 master_th->th.th_task_state_stack_sz) {
2036 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2037 kmp_uint8 *old_stack, *new_stack;
2039 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2040 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2041 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2043 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2047 old_stack = master_th->th.th_task_state_memo_stack;
2048 master_th->th.th_task_state_memo_stack = new_stack;
2049 master_th->th.th_task_state_stack_sz = new_size;
2050 __kmp_free(old_stack);
2054 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2055 master_th->th.th_task_state;
2056 master_th->th.th_task_state_top++;
2057 #if KMP_NESTED_HOT_TEAMS 2058 if (master_th->th.th_hot_teams &&
2059 active_level < __kmp_hot_teams_max_level &&
2060 team == master_th->th.th_hot_teams[active_level].hot_team) {
2062 master_th->th.th_task_state =
2064 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2067 master_th->th.th_task_state = 0;
2068 #if KMP_NESTED_HOT_TEAMS 2072 #if !KMP_NESTED_HOT_TEAMS 2073 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2074 (team == root->r.r_hot_team));
2080 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2081 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2083 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2084 (team->t.t_master_tid == 0 &&
2085 (team->t.t_parent == root->r.r_root_team ||
2086 team->t.t_parent->t.t_serialized)));
2090 argv = (
void **)team->t.t_argv;
2092 for (i = argc - 1; i >= 0; --i) {
2094 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2095 void *new_argv = va_arg(*ap,
void *);
2097 void *new_argv = va_arg(ap,
void *);
2099 KMP_CHECK_UPDATE(*argv, new_argv);
2103 for (i = 0; i < argc; ++i) {
2105 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2110 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2111 if (!root->r.r_active)
2112 root->r.r_active = TRUE;
2114 __kmp_fork_team_threads(root, team, master_th, gtid);
2115 __kmp_setup_icv_copy(team, nthreads,
2116 &master_th->th.th_current_task->td_icvs, loc);
2119 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2122 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2125 if (team->t.t_active_level == 1
2126 && !master_th->th.th_teams_microtask) {
2128 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2129 (__kmp_forkjoin_frames_mode == 3 ||
2130 __kmp_forkjoin_frames_mode == 1)) {
2131 kmp_uint64 tmp_time = 0;
2132 if (__itt_get_timestamp_ptr)
2133 tmp_time = __itt_get_timestamp();
2135 master_th->th.th_frame_time = tmp_time;
2136 if (__kmp_forkjoin_frames_mode == 3)
2137 team->t.t_region_time = tmp_time;
2141 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2142 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2144 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2150 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2153 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2154 root, team, master_th, gtid));
2157 if (__itt_stack_caller_create_ptr) {
2158 team->t.t_stack_id =
2159 __kmp_itt_stack_caller_create();
2167 __kmp_internal_fork(loc, gtid, team);
2168 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2169 "master_th=%p, gtid=%d\n",
2170 root, team, master_th, gtid));
2173 if (call_context == fork_context_gnu) {
2174 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2179 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2180 team->t.t_id, team->t.t_pkfn));
2183 #if KMP_STATS_ENABLED 2187 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2191 if (!team->t.t_invoke(gtid)) {
2192 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2195 #if KMP_STATS_ENABLED 2198 KMP_SET_THREAD_STATE(previous_state);
2202 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2203 team->t.t_id, team->t.t_pkfn));
2206 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2209 if (ompt_enabled.enabled) {
2210 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2218 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2221 thread->th.ompt_thread_info.state =
2222 ((team->t.t_serialized) ? ompt_state_work_serial
2223 : ompt_state_work_parallel);
2226 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2227 kmp_team_t *team, ompt_data_t *parallel_data,
2228 fork_context_e fork_context,
void *codeptr) {
2229 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2230 if (ompt_enabled.ompt_callback_parallel_end) {
2231 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2232 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2236 task_info->frame.enter_frame = ompt_data_none;
2237 __kmp_join_restore_state(thread, team);
2241 void __kmp_join_call(
ident_t *loc,
int gtid
2244 enum fork_context_e fork_context
2248 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2250 kmp_team_t *parent_team;
2251 kmp_info_t *master_th;
2255 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2258 master_th = __kmp_threads[gtid];
2259 root = master_th->th.th_root;
2260 team = master_th->th.th_team;
2261 parent_team = team->t.t_parent;
2263 master_th->th.th_ident = loc;
2266 if (ompt_enabled.enabled) {
2267 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2272 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2273 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2274 "th_task_team = %p\n",
2275 __kmp_gtid_from_thread(master_th), team,
2276 team->t.t_task_team[master_th->th.th_task_state],
2277 master_th->th.th_task_team));
2278 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2279 team->t.t_task_team[master_th->th.th_task_state]);
2283 if (team->t.t_serialized) {
2284 if (master_th->th.th_teams_microtask) {
2286 int level = team->t.t_level;
2287 int tlevel = master_th->th.th_teams_level;
2288 if (level == tlevel) {
2292 }
else if (level == tlevel + 1) {
2296 team->t.t_serialized++;
2302 if (ompt_enabled.enabled) {
2303 __kmp_join_restore_state(master_th, parent_team);
2310 master_active = team->t.t_master_active;
2315 __kmp_internal_join(loc, gtid, team);
2317 master_th->th.th_task_state =
2324 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2325 void *codeptr = team->t.ompt_team_info.master_return_address;
2329 if (__itt_stack_caller_create_ptr) {
2330 __kmp_itt_stack_caller_destroy(
2331 (__itt_caller)team->t
2336 if (team->t.t_active_level == 1 &&
2337 !master_th->th.th_teams_microtask) {
2338 master_th->th.th_ident = loc;
2341 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2342 __kmp_forkjoin_frames_mode == 3)
2343 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2344 master_th->th.th_frame_time, 0, loc,
2345 master_th->th.th_team_nproc, 1);
2346 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2347 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2348 __kmp_itt_region_joined(gtid);
2352 if (master_th->th.th_teams_microtask && !exit_teams &&
2353 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2354 team->t.t_level == master_th->th.th_teams_level + 1) {
2361 team->t.t_active_level--;
2362 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2368 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2369 int old_num = master_th->th.th_team_nproc;
2370 int new_num = master_th->th.th_teams_size.nth;
2371 kmp_info_t **other_threads = team->t.t_threads;
2372 team->t.t_nproc = new_num;
2373 for (
int i = 0; i < old_num; ++i) {
2374 other_threads[i]->th.th_team_nproc = new_num;
2377 for (
int i = old_num; i < new_num; ++i) {
2379 KMP_DEBUG_ASSERT(other_threads[i]);
2380 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2381 for (
int b = 0; b < bs_last_barrier; ++b) {
2382 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2383 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2385 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2388 if (__kmp_tasking_mode != tskm_immediate_exec) {
2390 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2396 if (ompt_enabled.enabled) {
2397 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2406 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2407 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2409 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2414 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2416 if (!master_th->th.th_teams_microtask ||
2417 team->t.t_level > master_th->th.th_teams_level) {
2419 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2421 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2424 if (ompt_enabled.enabled) {
2425 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2426 if (ompt_enabled.ompt_callback_implicit_task) {
2427 int ompt_team_size = team->t.t_nproc;
2428 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2429 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2430 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2433 task_info->frame.exit_frame = ompt_data_none;
2434 task_info->task_data = ompt_data_none;
2438 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2440 __kmp_pop_current_task_from_thread(master_th);
2442 #if KMP_AFFINITY_SUPPORTED 2444 master_th->th.th_first_place = team->t.t_first_place;
2445 master_th->th.th_last_place = team->t.t_last_place;
2446 #endif // KMP_AFFINITY_SUPPORTED 2447 master_th->th.th_def_allocator = team->t.t_def_allocator;
2449 updateHWFPControl(team);
2451 if (root->r.r_active != master_active)
2452 root->r.r_active = master_active;
2454 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2462 master_th->th.th_team = parent_team;
2463 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2464 master_th->th.th_team_master = parent_team->t.t_threads[0];
2465 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2468 if (parent_team->t.t_serialized &&
2469 parent_team != master_th->th.th_serial_team &&
2470 parent_team != root->r.r_root_team) {
2471 __kmp_free_team(root,
2472 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2473 master_th->th.th_serial_team = parent_team;
2476 if (__kmp_tasking_mode != tskm_immediate_exec) {
2477 if (master_th->th.th_task_state_top >
2479 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2481 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2482 master_th->th.th_task_state;
2483 --master_th->th.th_task_state_top;
2485 master_th->th.th_task_state =
2487 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2490 master_th->th.th_task_team =
2491 parent_team->t.t_task_team[master_th->th.th_task_state];
2493 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2494 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2501 master_th->th.th_current_task->td_flags.executing = 1;
2503 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2506 if (ompt_enabled.enabled) {
2507 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2513 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2518 void __kmp_save_internal_controls(kmp_info_t *thread) {
2520 if (thread->th.th_team != thread->th.th_serial_team) {
2523 if (thread->th.th_team->t.t_serialized > 1) {
2526 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2529 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2530 thread->th.th_team->t.t_serialized) {
2535 kmp_internal_control_t *control =
2536 (kmp_internal_control_t *)__kmp_allocate(
2537 sizeof(kmp_internal_control_t));
2539 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2541 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2543 control->next = thread->th.th_team->t.t_control_stack_top;
2544 thread->th.th_team->t.t_control_stack_top = control;
2550 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2554 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2555 KMP_DEBUG_ASSERT(__kmp_init_serial);
2559 else if (new_nth > __kmp_max_nth)
2560 new_nth = __kmp_max_nth;
2563 thread = __kmp_threads[gtid];
2564 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2567 __kmp_save_internal_controls(thread);
2569 set__nproc(thread, new_nth);
2574 root = thread->th.th_root;
2575 if (__kmp_init_parallel && (!root->r.r_active) &&
2576 (root->r.r_hot_team->t.t_nproc > new_nth)
2577 #
if KMP_NESTED_HOT_TEAMS
2578 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2581 kmp_team_t *hot_team = root->r.r_hot_team;
2584 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2587 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2588 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2589 if (__kmp_tasking_mode != tskm_immediate_exec) {
2592 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2594 __kmp_free_thread(hot_team->t.t_threads[f]);
2595 hot_team->t.t_threads[f] = NULL;
2597 hot_team->t.t_nproc = new_nth;
2598 #if KMP_NESTED_HOT_TEAMS 2599 if (thread->th.th_hot_teams) {
2600 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2601 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2605 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2608 for (f = 0; f < new_nth; f++) {
2609 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2610 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2613 hot_team->t.t_size_changed = -1;
2618 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2621 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2623 gtid, max_active_levels));
2624 KMP_DEBUG_ASSERT(__kmp_init_serial);
2627 if (max_active_levels < 0) {
2628 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2633 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2634 "max_active_levels for thread %d = (%d)\n",
2635 gtid, max_active_levels));
2638 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2643 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2644 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2645 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2651 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2652 "max_active_levels for thread %d = (%d)\n",
2653 gtid, max_active_levels));
2655 thread = __kmp_threads[gtid];
2657 __kmp_save_internal_controls(thread);
2659 set__max_active_levels(thread, max_active_levels);
2663 int __kmp_get_max_active_levels(
int gtid) {
2666 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2667 KMP_DEBUG_ASSERT(__kmp_init_serial);
2669 thread = __kmp_threads[gtid];
2670 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2671 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2672 "curtask_maxaclevel=%d\n",
2673 gtid, thread->th.th_current_task,
2674 thread->th.th_current_task->td_icvs.max_active_levels));
2675 return thread->th.th_current_task->td_icvs.max_active_levels;
2678 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2679 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2682 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2684 kmp_sched_t orig_kind;
2687 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2688 gtid, (
int)kind, chunk));
2689 KMP_DEBUG_ASSERT(__kmp_init_serial);
2696 kind = __kmp_sched_without_mods(kind);
2698 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2699 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2701 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2702 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2704 kind = kmp_sched_default;
2708 thread = __kmp_threads[gtid];
2710 __kmp_save_internal_controls(thread);
2712 if (kind < kmp_sched_upper_std) {
2713 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2716 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2718 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2719 __kmp_sch_map[kind - kmp_sched_lower - 1];
2724 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2725 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2726 kmp_sched_lower - 2];
2728 __kmp_sched_apply_mods_intkind(
2729 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2730 if (kind == kmp_sched_auto || chunk < 1) {
2732 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2734 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2739 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2743 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2744 KMP_DEBUG_ASSERT(__kmp_init_serial);
2746 thread = __kmp_threads[gtid];
2748 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2749 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2751 case kmp_sch_static_greedy:
2752 case kmp_sch_static_balanced:
2753 *kind = kmp_sched_static;
2754 __kmp_sched_apply_mods_stdkind(kind, th_type);
2757 case kmp_sch_static_chunked:
2758 *kind = kmp_sched_static;
2760 case kmp_sch_dynamic_chunked:
2761 *kind = kmp_sched_dynamic;
2764 case kmp_sch_guided_iterative_chunked:
2765 case kmp_sch_guided_analytical_chunked:
2766 *kind = kmp_sched_guided;
2769 *kind = kmp_sched_auto;
2771 case kmp_sch_trapezoidal:
2772 *kind = kmp_sched_trapezoidal;
2774 #if KMP_STATIC_STEAL_ENABLED 2775 case kmp_sch_static_steal:
2776 *kind = kmp_sched_static_steal;
2780 KMP_FATAL(UnknownSchedulingType, th_type);
2783 __kmp_sched_apply_mods_stdkind(kind, th_type);
2784 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2787 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2793 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2794 KMP_DEBUG_ASSERT(__kmp_init_serial);
2801 thr = __kmp_threads[gtid];
2802 team = thr->th.th_team;
2803 ii = team->t.t_level;
2807 if (thr->th.th_teams_microtask) {
2809 int tlevel = thr->th.th_teams_level;
2812 KMP_DEBUG_ASSERT(ii >= tlevel);
2824 return __kmp_tid_from_gtid(gtid);
2826 dd = team->t.t_serialized;
2828 while (ii > level) {
2829 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2831 if ((team->t.t_serialized) && (!dd)) {
2832 team = team->t.t_parent;
2836 team = team->t.t_parent;
2837 dd = team->t.t_serialized;
2842 return (dd > 1) ? (0) : (team->t.t_master_tid);
2845 int __kmp_get_team_size(
int gtid,
int level) {
2851 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2852 KMP_DEBUG_ASSERT(__kmp_init_serial);
2859 thr = __kmp_threads[gtid];
2860 team = thr->th.th_team;
2861 ii = team->t.t_level;
2865 if (thr->th.th_teams_microtask) {
2867 int tlevel = thr->th.th_teams_level;
2870 KMP_DEBUG_ASSERT(ii >= tlevel);
2881 while (ii > level) {
2882 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2884 if (team->t.t_serialized && (!dd)) {
2885 team = team->t.t_parent;
2889 team = team->t.t_parent;
2894 return team->t.t_nproc;
2897 kmp_r_sched_t __kmp_get_schedule_global() {
2902 kmp_r_sched_t r_sched;
2908 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
2909 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
2912 r_sched.r_sched_type = __kmp_static;
2915 r_sched.r_sched_type = __kmp_guided;
2917 r_sched.r_sched_type = __kmp_sched;
2919 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
2921 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2923 r_sched.chunk = KMP_DEFAULT_CHUNK;
2925 r_sched.chunk = __kmp_chunk;
2933 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2935 KMP_DEBUG_ASSERT(team);
2936 if (!realloc || argc > team->t.t_max_argc) {
2938 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2939 "current entries=%d\n",
2940 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2942 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2943 __kmp_free((
void *)team->t.t_argv);
2945 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
2947 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2948 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 2950 team->t.t_id, team->t.t_max_argc));
2951 team->t.t_argv = &team->t.t_inline_argv[0];
2952 if (__kmp_storage_map) {
2953 __kmp_print_storage_map_gtid(
2954 -1, &team->t.t_inline_argv[0],
2955 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2956 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
2961 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
2962 ? KMP_MIN_MALLOC_ARGV_ENTRIES
2964 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 2966 team->t.t_id, team->t.t_max_argc));
2968 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
2969 if (__kmp_storage_map) {
2970 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
2971 &team->t.t_argv[team->t.t_max_argc],
2972 sizeof(
void *) * team->t.t_max_argc,
2973 "team_%d.t_argv", team->t.t_id);
2979 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
2981 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
2983 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
2984 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
2985 sizeof(dispatch_shared_info_t) * num_disp_buff);
2986 team->t.t_dispatch =
2987 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
2988 team->t.t_implicit_task_taskdata =
2989 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
2990 team->t.t_max_nproc = max_nth;
2993 for (i = 0; i < num_disp_buff; ++i) {
2994 team->t.t_disp_buffer[i].buffer_index = i;
2995 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2999 static void __kmp_free_team_arrays(kmp_team_t *team) {
3002 for (i = 0; i < team->t.t_max_nproc; ++i) {
3003 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3004 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3005 team->t.t_dispatch[i].th_disp_buffer = NULL;
3008 #if KMP_USE_HIER_SCHED 3009 __kmp_dispatch_free_hierarchies(team);
3011 __kmp_free(team->t.t_threads);
3012 __kmp_free(team->t.t_disp_buffer);
3013 __kmp_free(team->t.t_dispatch);
3014 __kmp_free(team->t.t_implicit_task_taskdata);
3015 team->t.t_threads = NULL;
3016 team->t.t_disp_buffer = NULL;
3017 team->t.t_dispatch = NULL;
3018 team->t.t_implicit_task_taskdata = 0;
3021 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3022 kmp_info_t **oldThreads = team->t.t_threads;
3024 __kmp_free(team->t.t_disp_buffer);
3025 __kmp_free(team->t.t_dispatch);
3026 __kmp_free(team->t.t_implicit_task_taskdata);
3027 __kmp_allocate_team_arrays(team, max_nth);
3029 KMP_MEMCPY(team->t.t_threads, oldThreads,
3030 team->t.t_nproc *
sizeof(kmp_info_t *));
3032 __kmp_free(oldThreads);
3035 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3037 kmp_r_sched_t r_sched =
3038 __kmp_get_schedule_global();
3040 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3042 kmp_internal_control_t g_icvs = {
3044 (kmp_int8)__kmp_global.g.g_dynamic,
3046 (kmp_int8)__kmp_env_blocktime,
3048 __kmp_dflt_blocktime,
3053 __kmp_dflt_team_nth,
3057 __kmp_dflt_max_active_levels,
3061 __kmp_nested_proc_bind.bind_types[0],
3062 __kmp_default_device,
3069 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3071 kmp_internal_control_t gx_icvs;
3072 gx_icvs.serial_nesting_level =
3074 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3075 gx_icvs.next = NULL;
3080 static void __kmp_initialize_root(kmp_root_t *root) {
3082 kmp_team_t *root_team;
3083 kmp_team_t *hot_team;
3084 int hot_team_max_nth;
3085 kmp_r_sched_t r_sched =
3086 __kmp_get_schedule_global();
3087 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3088 KMP_DEBUG_ASSERT(root);
3089 KMP_ASSERT(!root->r.r_begin);
3092 __kmp_init_lock(&root->r.r_begin_lock);
3093 root->r.r_begin = FALSE;
3094 root->r.r_active = FALSE;
3095 root->r.r_in_parallel = 0;
3096 root->r.r_blocktime = __kmp_dflt_blocktime;
3100 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3103 __kmp_allocate_team(root,
3109 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3111 USE_NESTED_HOT_ARG(NULL)
3116 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3119 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3121 root->r.r_root_team = root_team;
3122 root_team->t.t_control_stack_top = NULL;
3125 root_team->t.t_threads[0] = NULL;
3126 root_team->t.t_nproc = 1;
3127 root_team->t.t_serialized = 1;
3129 root_team->t.t_sched.sched = r_sched.sched;
3132 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3133 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3137 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3140 __kmp_allocate_team(root,
3142 __kmp_dflt_team_nth_ub * 2,
3146 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3148 USE_NESTED_HOT_ARG(NULL)
3150 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3152 root->r.r_hot_team = hot_team;
3153 root_team->t.t_control_stack_top = NULL;
3156 hot_team->t.t_parent = root_team;
3159 hot_team_max_nth = hot_team->t.t_max_nproc;
3160 for (f = 0; f < hot_team_max_nth; ++f) {
3161 hot_team->t.t_threads[f] = NULL;
3163 hot_team->t.t_nproc = 1;
3165 hot_team->t.t_sched.sched = r_sched.sched;
3166 hot_team->t.t_size_changed = 0;
3171 typedef struct kmp_team_list_item {
3172 kmp_team_p
const *entry;
3173 struct kmp_team_list_item *next;
3174 } kmp_team_list_item_t;
3175 typedef kmp_team_list_item_t *kmp_team_list_t;
3177 static void __kmp_print_structure_team_accum(
3178 kmp_team_list_t list,
3179 kmp_team_p
const *team
3189 KMP_DEBUG_ASSERT(list != NULL);
3194 __kmp_print_structure_team_accum(list, team->t.t_parent);
3195 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3199 while (l->next != NULL && l->entry != team) {
3202 if (l->next != NULL) {
3208 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3214 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3215 sizeof(kmp_team_list_item_t));
3222 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3225 __kmp_printf(
"%s", title);
3227 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3229 __kmp_printf(
" - (nil)\n");
3233 static void __kmp_print_structure_thread(
char const *title,
3234 kmp_info_p
const *thread) {
3235 __kmp_printf(
"%s", title);
3236 if (thread != NULL) {
3237 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3239 __kmp_printf(
" - (nil)\n");
3243 void __kmp_print_structure(
void) {
3245 kmp_team_list_t list;
3249 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3253 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3254 "Table\n------------------------------\n");
3257 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3258 __kmp_printf(
"%2d", gtid);
3259 if (__kmp_threads != NULL) {
3260 __kmp_printf(
" %p", __kmp_threads[gtid]);
3262 if (__kmp_root != NULL) {
3263 __kmp_printf(
" %p", __kmp_root[gtid]);
3270 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3272 if (__kmp_threads != NULL) {
3274 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3275 kmp_info_t
const *thread = __kmp_threads[gtid];
3276 if (thread != NULL) {
3277 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3278 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3279 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3280 __kmp_print_structure_team(
" Serial Team: ",
3281 thread->th.th_serial_team);
3282 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3283 __kmp_print_structure_thread(
" Master: ",
3284 thread->th.th_team_master);
3285 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3286 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3287 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3288 __kmp_print_structure_thread(
" Next in pool: ",
3289 thread->th.th_next_pool);
3291 __kmp_print_structure_team_accum(list, thread->th.th_team);
3292 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3296 __kmp_printf(
"Threads array is not allocated.\n");
3300 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3302 if (__kmp_root != NULL) {
3304 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3305 kmp_root_t
const *root = __kmp_root[gtid];
3307 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3308 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3309 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3310 __kmp_print_structure_thread(
" Uber Thread: ",
3311 root->r.r_uber_thread);
3312 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3313 __kmp_printf(
" In Parallel: %2d\n",
3314 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3316 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3317 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3321 __kmp_printf(
"Ubers array is not allocated.\n");
3324 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3326 while (list->next != NULL) {
3327 kmp_team_p
const *team = list->entry;
3329 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3330 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3331 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3332 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3333 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3334 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3335 for (i = 0; i < team->t.t_nproc; ++i) {
3336 __kmp_printf(
" Thread %2d: ", i);
3337 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3339 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3345 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3347 __kmp_print_structure_thread(
"Thread pool: ",
3348 CCAST(kmp_info_t *, __kmp_thread_pool));
3349 __kmp_print_structure_team(
"Team pool: ",
3350 CCAST(kmp_team_t *, __kmp_team_pool));
3354 while (list != NULL) {
3355 kmp_team_list_item_t *item = list;
3357 KMP_INTERNAL_FREE(item);
3366 static const unsigned __kmp_primes[] = {
3367 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3368 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3369 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3370 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3371 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3372 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3373 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3374 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3375 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3376 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3377 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3381 unsigned short __kmp_get_random(kmp_info_t *thread) {
3382 unsigned x = thread->th.th_x;
3383 unsigned short r = x >> 16;
3385 thread->th.th_x = x * thread->th.th_a + 1;
3387 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3388 thread->th.th_info.ds.ds_tid, r));
3394 void __kmp_init_random(kmp_info_t *thread) {
3395 unsigned seed = thread->th.th_info.ds.ds_tid;
3398 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3399 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3401 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3407 static int __kmp_reclaim_dead_roots(
void) {
3410 for (i = 0; i < __kmp_threads_capacity; ++i) {
3411 if (KMP_UBER_GTID(i) &&
3412 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3415 r += __kmp_unregister_root_other_thread(i);
3440 static int __kmp_expand_threads(
int nNeed) {
3442 int minimumRequiredCapacity;
3444 kmp_info_t **newThreads;
3445 kmp_root_t **newRoot;
3451 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 3454 added = __kmp_reclaim_dead_roots();
3483 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3486 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3490 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3492 newCapacity = __kmp_threads_capacity;
3494 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3495 : __kmp_sys_max_nth;
3496 }
while (newCapacity < minimumRequiredCapacity);
3497 newThreads = (kmp_info_t **)__kmp_allocate(
3498 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3500 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3501 KMP_MEMCPY(newThreads, __kmp_threads,
3502 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3503 KMP_MEMCPY(newRoot, __kmp_root,
3504 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3506 kmp_info_t **temp_threads = __kmp_threads;
3507 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3508 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3509 __kmp_free(temp_threads);
3510 added += newCapacity - __kmp_threads_capacity;
3511 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3513 if (newCapacity > __kmp_tp_capacity) {
3514 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3515 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3516 __kmp_threadprivate_resize_cache(newCapacity);
3518 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3520 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3529 int __kmp_register_root(
int initial_thread) {
3530 kmp_info_t *root_thread;
3534 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3535 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3552 capacity = __kmp_threads_capacity;
3553 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3558 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3559 if (__kmp_tp_cached) {
3560 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3561 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3562 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3564 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3572 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3576 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3577 KMP_ASSERT(gtid < __kmp_threads_capacity);
3581 TCW_4(__kmp_nth, __kmp_nth + 1);
3585 if (__kmp_adjust_gtid_mode) {
3586 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3587 if (TCR_4(__kmp_gtid_mode) != 2) {
3588 TCW_4(__kmp_gtid_mode, 2);
3591 if (TCR_4(__kmp_gtid_mode) != 1) {
3592 TCW_4(__kmp_gtid_mode, 1);
3597 #ifdef KMP_ADJUST_BLOCKTIME 3600 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3601 if (__kmp_nth > __kmp_avail_proc) {
3602 __kmp_zero_bt = TRUE;
3608 if (!(root = __kmp_root[gtid])) {
3609 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3610 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3613 #if KMP_STATS_ENABLED 3615 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3616 __kmp_stats_thread_ptr->startLife();
3617 KMP_SET_THREAD_STATE(SERIAL_REGION);
3620 __kmp_initialize_root(root);
3623 if (root->r.r_uber_thread) {
3624 root_thread = root->r.r_uber_thread;
3626 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3627 if (__kmp_storage_map) {
3628 __kmp_print_thread_storage_map(root_thread, gtid);
3630 root_thread->th.th_info.ds.ds_gtid = gtid;
3632 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3634 root_thread->th.th_root = root;
3635 if (__kmp_env_consistency_check) {
3636 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3639 __kmp_initialize_fast_memory(root_thread);
3643 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3644 __kmp_initialize_bget(root_thread);
3646 __kmp_init_random(root_thread);
3650 if (!root_thread->th.th_serial_team) {
3651 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3652 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3653 root_thread->th.th_serial_team = __kmp_allocate_team(
3658 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3660 KMP_ASSERT(root_thread->th.th_serial_team);
3661 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3662 root_thread->th.th_serial_team));
3665 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3667 root->r.r_root_team->t.t_threads[0] = root_thread;
3668 root->r.r_hot_team->t.t_threads[0] = root_thread;
3669 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3671 root_thread->th.th_serial_team->t.t_serialized = 0;
3672 root->r.r_uber_thread = root_thread;
3675 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3676 TCW_4(__kmp_init_gtid, TRUE);
3679 __kmp_gtid_set_specific(gtid);
3682 __kmp_itt_thread_name(gtid);
3685 #ifdef KMP_TDATA_GTID 3688 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3689 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3691 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3693 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3694 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3695 KMP_INIT_BARRIER_STATE));
3698 for (b = 0; b < bs_last_barrier; ++b) {
3699 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3701 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3705 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3706 KMP_INIT_BARRIER_STATE);
3708 #if KMP_AFFINITY_SUPPORTED 3709 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3710 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3711 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3712 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3713 if (TCR_4(__kmp_init_middle)) {
3714 __kmp_affinity_set_init_mask(gtid, TRUE);
3717 root_thread->th.th_def_allocator = __kmp_def_allocator;
3718 root_thread->th.th_prev_level = 0;
3719 root_thread->th.th_prev_num_threads = 1;
3721 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3722 tmp->cg_root = root_thread;
3723 tmp->cg_thread_limit = __kmp_cg_max_nth;
3724 tmp->cg_nthreads = 1;
3725 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with" 3726 " cg_nthreads init to 1\n",
3729 root_thread->th.th_cg_roots = tmp;
3731 __kmp_root_counter++;
3734 if (!initial_thread && ompt_enabled.enabled) {
3736 kmp_info_t *root_thread = ompt_get_thread();
3738 ompt_set_thread_state(root_thread, ompt_state_overhead);
3740 if (ompt_enabled.ompt_callback_thread_begin) {
3741 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3742 ompt_thread_initial, __ompt_get_thread_data_internal());
3744 ompt_data_t *task_data;
3745 ompt_data_t *parallel_data;
3746 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3747 if (ompt_enabled.ompt_callback_implicit_task) {
3748 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3749 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3752 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3757 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3762 #if KMP_NESTED_HOT_TEAMS 3763 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3764 const int max_level) {
3766 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3767 if (!hot_teams || !hot_teams[level].hot_team) {
3770 KMP_DEBUG_ASSERT(level < max_level);
3771 kmp_team_t *team = hot_teams[level].hot_team;
3772 nth = hot_teams[level].hot_team_nth;
3774 if (level < max_level - 1) {
3775 for (i = 0; i < nth; ++i) {
3776 kmp_info_t *th = team->t.t_threads[i];
3777 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3778 if (i > 0 && th->th.th_hot_teams) {
3779 __kmp_free(th->th.th_hot_teams);
3780 th->th.th_hot_teams = NULL;
3784 __kmp_free_team(root, team, NULL);
3791 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3792 kmp_team_t *root_team = root->r.r_root_team;
3793 kmp_team_t *hot_team = root->r.r_hot_team;
3794 int n = hot_team->t.t_nproc;
3797 KMP_DEBUG_ASSERT(!root->r.r_active);
3799 root->r.r_root_team = NULL;
3800 root->r.r_hot_team = NULL;
3803 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3804 #if KMP_NESTED_HOT_TEAMS 3805 if (__kmp_hot_teams_max_level >
3807 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3808 kmp_info_t *th = hot_team->t.t_threads[i];
3809 if (__kmp_hot_teams_max_level > 1) {
3810 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3812 if (th->th.th_hot_teams) {
3813 __kmp_free(th->th.th_hot_teams);
3814 th->th.th_hot_teams = NULL;
3819 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3824 if (__kmp_tasking_mode != tskm_immediate_exec) {
3825 __kmp_wait_to_unref_task_teams();
3831 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3833 (LPVOID) & (root->r.r_uber_thread->th),
3834 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3835 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3839 ompt_data_t *task_data;
3840 ompt_data_t *parallel_data;
3841 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3842 if (ompt_enabled.ompt_callback_implicit_task) {
3843 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3844 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3846 if (ompt_enabled.ompt_callback_thread_end) {
3847 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3848 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3854 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3855 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 3857 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3858 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
3861 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
3862 root->r.r_uber_thread->th.th_cg_roots->cg_root);
3863 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
3864 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
3865 root->r.r_uber_thread->th.th_cg_roots = NULL;
3867 __kmp_reap_thread(root->r.r_uber_thread, 1);
3871 root->r.r_uber_thread = NULL;
3873 root->r.r_begin = FALSE;
3878 void __kmp_unregister_root_current_thread(
int gtid) {
3879 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3883 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3884 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3885 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3888 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3891 kmp_root_t *root = __kmp_root[gtid];
3893 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3894 KMP_ASSERT(KMP_UBER_GTID(gtid));
3895 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3896 KMP_ASSERT(root->r.r_active == FALSE);
3900 kmp_info_t *thread = __kmp_threads[gtid];
3901 kmp_team_t *team = thread->th.th_team;
3902 kmp_task_team_t *task_team = thread->th.th_task_team;
3905 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3908 thread->th.ompt_thread_info.state = ompt_state_undefined;
3910 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3913 __kmp_reset_root(gtid, root);
3916 __kmp_gtid_set_specific(KMP_GTID_DNE);
3917 #ifdef KMP_TDATA_GTID 3918 __kmp_gtid = KMP_GTID_DNE;
3923 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3925 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3932 static int __kmp_unregister_root_other_thread(
int gtid) {
3933 kmp_root_t *root = __kmp_root[gtid];
3936 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3937 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3938 KMP_ASSERT(KMP_UBER_GTID(gtid));
3939 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3940 KMP_ASSERT(root->r.r_active == FALSE);
3942 r = __kmp_reset_root(gtid, root);
3944 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
3950 void __kmp_task_info() {
3952 kmp_int32 gtid = __kmp_entry_gtid();
3953 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
3954 kmp_info_t *this_thr = __kmp_threads[gtid];
3955 kmp_team_t *steam = this_thr->th.th_serial_team;
3956 kmp_team_t *team = this_thr->th.th_team;
3959 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 3961 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
3962 team->t.t_implicit_task_taskdata[tid].td_parent);
3969 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
3970 int tid,
int gtid) {
3974 kmp_info_t *master = team->t.t_threads[0];
3975 KMP_DEBUG_ASSERT(this_thr != NULL);
3976 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
3977 KMP_DEBUG_ASSERT(team);
3978 KMP_DEBUG_ASSERT(team->t.t_threads);
3979 KMP_DEBUG_ASSERT(team->t.t_dispatch);
3980 KMP_DEBUG_ASSERT(master);
3981 KMP_DEBUG_ASSERT(master->th.th_root);
3985 TCW_SYNC_PTR(this_thr->th.th_team, team);
3987 this_thr->th.th_info.ds.ds_tid = tid;
3988 this_thr->th.th_set_nproc = 0;
3989 if (__kmp_tasking_mode != tskm_immediate_exec)
3992 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3994 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
3995 this_thr->th.th_set_proc_bind = proc_bind_default;
3996 #if KMP_AFFINITY_SUPPORTED 3997 this_thr->th.th_new_place = this_thr->th.th_current_place;
3999 this_thr->th.th_root = master->th.th_root;
4002 this_thr->th.th_team_nproc = team->t.t_nproc;
4003 this_thr->th.th_team_master = master;
4004 this_thr->th.th_team_serialized = team->t.t_serialized;
4005 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4007 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4009 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4010 tid, gtid, this_thr, this_thr->th.th_current_task));
4012 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4015 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4016 tid, gtid, this_thr, this_thr->th.th_current_task));
4021 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4023 this_thr->th.th_local.this_construct = 0;
4025 if (!this_thr->th.th_pri_common) {
4026 this_thr->th.th_pri_common =
4027 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4028 if (__kmp_storage_map) {
4029 __kmp_print_storage_map_gtid(
4030 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4031 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4033 this_thr->th.th_pri_head = NULL;
4036 if (this_thr != master &&
4037 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4039 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4040 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4043 int i = tmp->cg_nthreads--;
4044 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads" 4045 " on node %p of thread %p to %d\n",
4046 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4051 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4053 this_thr->th.th_cg_roots->cg_nthreads++;
4054 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on" 4055 " node %p of thread %p to %d\n",
4056 this_thr, this_thr->th.th_cg_roots,
4057 this_thr->th.th_cg_roots->cg_root,
4058 this_thr->th.th_cg_roots->cg_nthreads));
4059 this_thr->th.th_current_task->td_icvs.thread_limit =
4060 this_thr->th.th_cg_roots->cg_thread_limit;
4065 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4068 sizeof(dispatch_private_info_t) *
4069 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4070 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4071 team->t.t_max_nproc));
4072 KMP_ASSERT(dispatch);
4073 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4074 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4076 dispatch->th_disp_index = 0;
4077 dispatch->th_doacross_buf_idx = 0;
4078 if (!dispatch->th_disp_buffer) {
4079 dispatch->th_disp_buffer =
4080 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4082 if (__kmp_storage_map) {
4083 __kmp_print_storage_map_gtid(
4084 gtid, &dispatch->th_disp_buffer[0],
4085 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4087 : __kmp_dispatch_num_buffers],
4088 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4089 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4090 gtid, team->t.t_id, gtid);
4093 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4096 dispatch->th_dispatch_pr_current = 0;
4097 dispatch->th_dispatch_sh_current = 0;
4099 dispatch->th_deo_fcn = 0;
4100 dispatch->th_dxo_fcn = 0;
4103 this_thr->th.th_next_pool = NULL;
4105 if (!this_thr->th.th_task_state_memo_stack) {
4107 this_thr->th.th_task_state_memo_stack =
4108 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4109 this_thr->th.th_task_state_top = 0;
4110 this_thr->th.th_task_state_stack_sz = 4;
4111 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4113 this_thr->th.th_task_state_memo_stack[i] = 0;
4116 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4117 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4127 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4129 kmp_team_t *serial_team;
4130 kmp_info_t *new_thr;
4133 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4134 KMP_DEBUG_ASSERT(root && team);
4135 #if !KMP_NESTED_HOT_TEAMS 4136 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4141 if (__kmp_thread_pool) {
4142 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4143 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4144 if (new_thr == __kmp_thread_pool_insert_pt) {
4145 __kmp_thread_pool_insert_pt = NULL;
4147 TCW_4(new_thr->th.th_in_pool, FALSE);
4148 __kmp_suspend_initialize_thread(new_thr);
4149 __kmp_lock_suspend_mx(new_thr);
4150 if (new_thr->th.th_active_in_pool == TRUE) {
4151 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4152 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4153 new_thr->th.th_active_in_pool = FALSE;
4155 __kmp_unlock_suspend_mx(new_thr);
4157 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4158 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4159 KMP_ASSERT(!new_thr->th.th_team);
4160 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4163 __kmp_initialize_info(new_thr, team, new_tid,
4164 new_thr->th.th_info.ds.ds_gtid);
4165 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4167 TCW_4(__kmp_nth, __kmp_nth + 1);
4169 new_thr->th.th_task_state = 0;
4170 new_thr->th.th_task_state_top = 0;
4171 new_thr->th.th_task_state_stack_sz = 4;
4173 #ifdef KMP_ADJUST_BLOCKTIME 4176 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4177 if (__kmp_nth > __kmp_avail_proc) {
4178 __kmp_zero_bt = TRUE;
4187 kmp_balign_t *balign = new_thr->th.th_bar;
4188 for (b = 0; b < bs_last_barrier; ++b)
4189 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4192 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4193 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4200 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4201 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4206 if (!TCR_4(__kmp_init_monitor)) {
4207 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4208 if (!TCR_4(__kmp_init_monitor)) {
4209 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4210 TCW_4(__kmp_init_monitor, 1);
4211 __kmp_create_monitor(&__kmp_monitor);
4212 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4223 while (TCR_4(__kmp_init_monitor) < 2) {
4226 KF_TRACE(10, (
"after monitor thread has started\n"));
4229 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4234 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4235 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4239 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4241 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4243 if (__kmp_storage_map) {
4244 __kmp_print_thread_storage_map(new_thr, new_gtid);
4249 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4250 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4251 new_thr->th.th_serial_team = serial_team =
4252 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4256 proc_bind_default, &r_icvs,
4257 0 USE_NESTED_HOT_ARG(NULL));
4259 KMP_ASSERT(serial_team);
4260 serial_team->t.t_serialized = 0;
4262 serial_team->t.t_threads[0] = new_thr;
4264 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4268 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4271 __kmp_initialize_fast_memory(new_thr);
4275 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4276 __kmp_initialize_bget(new_thr);
4279 __kmp_init_random(new_thr);
4283 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4284 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4287 kmp_balign_t *balign = new_thr->th.th_bar;
4288 for (b = 0; b < bs_last_barrier; ++b) {
4289 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4290 balign[b].bb.team = NULL;
4291 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4292 balign[b].bb.use_oncore_barrier = 0;
4295 new_thr->th.th_spin_here = FALSE;
4296 new_thr->th.th_next_waiting = 0;
4298 new_thr->th.th_blocking =
false;
4301 #if KMP_AFFINITY_SUPPORTED 4302 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4303 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4304 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4305 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4307 new_thr->th.th_def_allocator = __kmp_def_allocator;
4308 new_thr->th.th_prev_level = 0;
4309 new_thr->th.th_prev_num_threads = 1;
4311 TCW_4(new_thr->th.th_in_pool, FALSE);
4312 new_thr->th.th_active_in_pool = FALSE;
4313 TCW_4(new_thr->th.th_active, TRUE);
4321 if (__kmp_adjust_gtid_mode) {
4322 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4323 if (TCR_4(__kmp_gtid_mode) != 2) {
4324 TCW_4(__kmp_gtid_mode, 2);
4327 if (TCR_4(__kmp_gtid_mode) != 1) {
4328 TCW_4(__kmp_gtid_mode, 1);
4333 #ifdef KMP_ADJUST_BLOCKTIME 4336 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4337 if (__kmp_nth > __kmp_avail_proc) {
4338 __kmp_zero_bt = TRUE;
4345 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4346 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4348 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4350 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4361 static void __kmp_reinitialize_team(kmp_team_t *team,
4362 kmp_internal_control_t *new_icvs,
4364 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4365 team->t.t_threads[0], team));
4366 KMP_DEBUG_ASSERT(team && new_icvs);
4367 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4368 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4370 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4372 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4373 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4375 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4376 team->t.t_threads[0], team));
4382 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4383 kmp_internal_control_t *new_icvs,
4385 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4388 KMP_DEBUG_ASSERT(team);
4389 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4390 KMP_DEBUG_ASSERT(team->t.t_threads);
4393 team->t.t_master_tid = 0;
4395 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4396 team->t.t_nproc = new_nproc;
4399 team->t.t_next_pool = NULL;
4403 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4404 team->t.t_invoke = NULL;
4407 team->t.t_sched.sched = new_icvs->sched.sched;
4409 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4410 team->t.t_fp_control_saved = FALSE;
4411 team->t.t_x87_fpu_control_word = 0;
4412 team->t.t_mxcsr = 0;
4415 team->t.t_construct = 0;
4417 team->t.t_ordered.dt.t_value = 0;
4418 team->t.t_master_active = FALSE;
4421 team->t.t_copypriv_data = NULL;
4424 team->t.t_copyin_counter = 0;
4427 team->t.t_control_stack_top = NULL;
4429 __kmp_reinitialize_team(team, new_icvs, loc);
4432 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4435 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4438 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4439 if (KMP_AFFINITY_CAPABLE()) {
4441 if (old_mask != NULL) {
4442 status = __kmp_get_system_affinity(old_mask, TRUE);
4445 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4449 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4454 #if KMP_AFFINITY_SUPPORTED 4460 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4462 kmp_info_t *master_th = team->t.t_threads[0];
4463 KMP_DEBUG_ASSERT(master_th != NULL);
4464 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4465 int first_place = master_th->th.th_first_place;
4466 int last_place = master_th->th.th_last_place;
4467 int masters_place = master_th->th.th_current_place;
4468 team->t.t_first_place = first_place;
4469 team->t.t_last_place = last_place;
4471 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4472 "bound to place %d partition = [%d,%d]\n",
4473 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4474 team->t.t_id, masters_place, first_place, last_place));
4476 switch (proc_bind) {
4478 case proc_bind_default:
4481 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4484 case proc_bind_master: {
4486 int n_th = team->t.t_nproc;
4487 for (f = 1; f < n_th; f++) {
4488 kmp_info_t *th = team->t.t_threads[f];
4489 KMP_DEBUG_ASSERT(th != NULL);
4490 th->th.th_first_place = first_place;
4491 th->th.th_last_place = last_place;
4492 th->th.th_new_place = masters_place;
4493 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4494 team->t.t_display_affinity != 1) {
4495 team->t.t_display_affinity = 1;
4498 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4499 "partition = [%d,%d]\n",
4500 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4501 f, masters_place, first_place, last_place));
4505 case proc_bind_close: {
4507 int n_th = team->t.t_nproc;
4509 if (first_place <= last_place) {
4510 n_places = last_place - first_place + 1;
4512 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4514 if (n_th <= n_places) {
4515 int place = masters_place;
4516 for (f = 1; f < n_th; f++) {
4517 kmp_info_t *th = team->t.t_threads[f];
4518 KMP_DEBUG_ASSERT(th != NULL);
4520 if (place == last_place) {
4521 place = first_place;
4522 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4527 th->th.th_first_place = first_place;
4528 th->th.th_last_place = last_place;
4529 th->th.th_new_place = place;
4530 if (__kmp_display_affinity && place != th->th.th_current_place &&
4531 team->t.t_display_affinity != 1) {
4532 team->t.t_display_affinity = 1;
4535 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4536 "partition = [%d,%d]\n",
4537 __kmp_gtid_from_thread(team->t.t_threads[f]),
4538 team->t.t_id, f, place, first_place, last_place));
4541 int S, rem, gap, s_count;
4542 S = n_th / n_places;
4544 rem = n_th - (S * n_places);
4545 gap = rem > 0 ? n_places / rem : n_places;
4546 int place = masters_place;
4548 for (f = 0; f < n_th; f++) {
4549 kmp_info_t *th = team->t.t_threads[f];
4550 KMP_DEBUG_ASSERT(th != NULL);
4552 th->th.th_first_place = first_place;
4553 th->th.th_last_place = last_place;
4554 th->th.th_new_place = place;
4555 if (__kmp_display_affinity && place != th->th.th_current_place &&
4556 team->t.t_display_affinity != 1) {
4557 team->t.t_display_affinity = 1;
4561 if ((s_count == S) && rem && (gap_ct == gap)) {
4563 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4565 if (place == last_place) {
4566 place = first_place;
4567 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4575 }
else if (s_count == S) {
4576 if (place == last_place) {
4577 place = first_place;
4578 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4588 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4589 "partition = [%d,%d]\n",
4590 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4591 th->th.th_new_place, first_place, last_place));
4593 KMP_DEBUG_ASSERT(place == masters_place);
4597 case proc_bind_spread: {
4599 int n_th = team->t.t_nproc;
4602 if (first_place <= last_place) {
4603 n_places = last_place - first_place + 1;
4605 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4607 if (n_th <= n_places) {
4610 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4611 int S = n_places / n_th;
4612 int s_count, rem, gap, gap_ct;
4614 place = masters_place;
4615 rem = n_places - n_th * S;
4616 gap = rem ? n_th / rem : 1;
4619 if (update_master_only == 1)
4621 for (f = 0; f < thidx; f++) {
4622 kmp_info_t *th = team->t.t_threads[f];
4623 KMP_DEBUG_ASSERT(th != NULL);
4625 th->th.th_first_place = place;
4626 th->th.th_new_place = place;
4627 if (__kmp_display_affinity && place != th->th.th_current_place &&
4628 team->t.t_display_affinity != 1) {
4629 team->t.t_display_affinity = 1;
4632 while (s_count < S) {
4633 if (place == last_place) {
4634 place = first_place;
4635 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4642 if (rem && (gap_ct == gap)) {
4643 if (place == last_place) {
4644 place = first_place;
4645 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4653 th->th.th_last_place = place;
4656 if (place == last_place) {
4657 place = first_place;
4658 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4665 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4666 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4667 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4668 f, th->th.th_new_place, th->th.th_first_place,
4669 th->th.th_last_place, __kmp_affinity_num_masks));
4675 double current =
static_cast<double>(masters_place);
4677 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4682 if (update_master_only == 1)
4684 for (f = 0; f < thidx; f++) {
4685 first =
static_cast<int>(current);
4686 last =
static_cast<int>(current + spacing) - 1;
4687 KMP_DEBUG_ASSERT(last >= first);
4688 if (first >= n_places) {
4689 if (masters_place) {
4692 if (first == (masters_place + 1)) {
4693 KMP_DEBUG_ASSERT(f == n_th);
4696 if (last == masters_place) {
4697 KMP_DEBUG_ASSERT(f == (n_th - 1));
4701 KMP_DEBUG_ASSERT(f == n_th);
4706 if (last >= n_places) {
4707 last = (n_places - 1);
4712 KMP_DEBUG_ASSERT(0 <= first);
4713 KMP_DEBUG_ASSERT(n_places > first);
4714 KMP_DEBUG_ASSERT(0 <= last);
4715 KMP_DEBUG_ASSERT(n_places > last);
4716 KMP_DEBUG_ASSERT(last_place >= first_place);
4717 th = team->t.t_threads[f];
4718 KMP_DEBUG_ASSERT(th);
4719 th->th.th_first_place = first;
4720 th->th.th_new_place = place;
4721 th->th.th_last_place = last;
4722 if (__kmp_display_affinity && place != th->th.th_current_place &&
4723 team->t.t_display_affinity != 1) {
4724 team->t.t_display_affinity = 1;
4727 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4728 "partition = [%d,%d], spacing = %.4f\n",
4729 __kmp_gtid_from_thread(team->t.t_threads[f]),
4730 team->t.t_id, f, th->th.th_new_place,
4731 th->th.th_first_place, th->th.th_last_place, spacing));
4735 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4737 int S, rem, gap, s_count;
4738 S = n_th / n_places;
4740 rem = n_th - (S * n_places);
4741 gap = rem > 0 ? n_places / rem : n_places;
4742 int place = masters_place;
4745 if (update_master_only == 1)
4747 for (f = 0; f < thidx; f++) {
4748 kmp_info_t *th = team->t.t_threads[f];
4749 KMP_DEBUG_ASSERT(th != NULL);
4751 th->th.th_first_place = place;
4752 th->th.th_last_place = place;
4753 th->th.th_new_place = place;
4754 if (__kmp_display_affinity && place != th->th.th_current_place &&
4755 team->t.t_display_affinity != 1) {
4756 team->t.t_display_affinity = 1;
4760 if ((s_count == S) && rem && (gap_ct == gap)) {
4762 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4764 if (place == last_place) {
4765 place = first_place;
4766 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4774 }
else if (s_count == S) {
4775 if (place == last_place) {
4776 place = first_place;
4777 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4786 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4787 "partition = [%d,%d]\n",
4788 __kmp_gtid_from_thread(team->t.t_threads[f]),
4789 team->t.t_id, f, th->th.th_new_place,
4790 th->th.th_first_place, th->th.th_last_place));
4792 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4800 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4803 #endif // KMP_AFFINITY_SUPPORTED 4808 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4810 ompt_data_t ompt_parallel_data,
4812 kmp_proc_bind_t new_proc_bind,
4813 kmp_internal_control_t *new_icvs,
4814 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4815 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4818 int use_hot_team = !root->r.r_active;
4821 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4822 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4823 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4826 #if KMP_NESTED_HOT_TEAMS 4827 kmp_hot_team_ptr_t *hot_teams;
4829 team = master->th.th_team;
4830 level = team->t.t_active_level;
4831 if (master->th.th_teams_microtask) {
4832 if (master->th.th_teams_size.nteams > 1 &&
4835 (microtask_t)__kmp_teams_master ||
4836 master->th.th_teams_level <
4842 hot_teams = master->th.th_hot_teams;
4843 if (level < __kmp_hot_teams_max_level && hot_teams &&
4853 if (use_hot_team && new_nproc > 1) {
4854 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
4855 #if KMP_NESTED_HOT_TEAMS 4856 team = hot_teams[level].hot_team;
4858 team = root->r.r_hot_team;
4861 if (__kmp_tasking_mode != tskm_immediate_exec) {
4862 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4863 "task_team[1] = %p before reinit\n",
4864 team->t.t_task_team[0], team->t.t_task_team[1]));
4871 if (team->t.t_nproc == new_nproc) {
4872 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4875 if (team->t.t_size_changed == -1) {
4876 team->t.t_size_changed = 1;
4878 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4882 kmp_r_sched_t new_sched = new_icvs->sched;
4884 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4886 __kmp_reinitialize_team(team, new_icvs,
4887 root->r.r_uber_thread->th.th_ident);
4889 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4890 team->t.t_threads[0], team));
4891 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4893 #if KMP_AFFINITY_SUPPORTED 4894 if ((team->t.t_size_changed == 0) &&
4895 (team->t.t_proc_bind == new_proc_bind)) {
4896 if (new_proc_bind == proc_bind_spread) {
4897 __kmp_partition_places(
4900 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4901 "proc_bind = %d, partition = [%d,%d]\n",
4902 team->t.t_id, new_proc_bind, team->t.t_first_place,
4903 team->t.t_last_place));
4905 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4906 __kmp_partition_places(team);
4909 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4911 }
else if (team->t.t_nproc > new_nproc) {
4913 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4916 team->t.t_size_changed = 1;
4917 #if KMP_NESTED_HOT_TEAMS 4918 if (__kmp_hot_teams_mode == 0) {
4921 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4922 hot_teams[level].hot_team_nth = new_nproc;
4923 #endif // KMP_NESTED_HOT_TEAMS 4925 for (f = new_nproc; f < team->t.t_nproc; f++) {
4926 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4927 if (__kmp_tasking_mode != tskm_immediate_exec) {
4930 team->t.t_threads[f]->th.th_task_team = NULL;
4932 __kmp_free_thread(team->t.t_threads[f]);
4933 team->t.t_threads[f] = NULL;
4935 #if KMP_NESTED_HOT_TEAMS 4940 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4941 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4942 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4943 for (
int b = 0; b < bs_last_barrier; ++b) {
4944 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4945 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4947 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4951 #endif // KMP_NESTED_HOT_TEAMS 4952 team->t.t_nproc = new_nproc;
4954 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
4955 __kmp_reinitialize_team(team, new_icvs,
4956 root->r.r_uber_thread->th.th_ident);
4959 for (f = 0; f < new_nproc; ++f) {
4960 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4965 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4966 team->t.t_threads[0], team));
4968 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4971 for (f = 0; f < team->t.t_nproc; f++) {
4972 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4973 team->t.t_threads[f]->th.th_team_nproc ==
4978 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4979 #if KMP_AFFINITY_SUPPORTED 4980 __kmp_partition_places(team);
4983 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4984 kmp_affin_mask_t *old_mask;
4985 if (KMP_AFFINITY_CAPABLE()) {
4986 KMP_CPU_ALLOC(old_mask);
4991 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
4994 team->t.t_size_changed = 1;
4996 #if KMP_NESTED_HOT_TEAMS 4997 int avail_threads = hot_teams[level].hot_team_nth;
4998 if (new_nproc < avail_threads)
4999 avail_threads = new_nproc;
5000 kmp_info_t **other_threads = team->t.t_threads;
5001 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5005 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5006 for (b = 0; b < bs_last_barrier; ++b) {
5007 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5008 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5010 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5014 if (hot_teams[level].hot_team_nth >= new_nproc) {
5017 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5018 team->t.t_nproc = new_nproc;
5024 hot_teams[level].hot_team_nth = new_nproc;
5025 #endif // KMP_NESTED_HOT_TEAMS 5026 if (team->t.t_max_nproc < new_nproc) {
5028 __kmp_reallocate_team_arrays(team, new_nproc);
5029 __kmp_reinitialize_team(team, new_icvs, NULL);
5032 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5037 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5041 for (f = team->t.t_nproc; f < new_nproc; f++) {
5042 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5043 KMP_DEBUG_ASSERT(new_worker);
5044 team->t.t_threads[f] = new_worker;
5047 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5048 "join=%llu, plain=%llu\n",
5049 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5050 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5051 team->t.t_bar[bs_plain_barrier].b_arrived));
5055 kmp_balign_t *balign = new_worker->th.th_bar;
5056 for (b = 0; b < bs_last_barrier; ++b) {
5057 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5058 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5059 KMP_BARRIER_PARENT_FLAG);
5061 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5067 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5068 if (KMP_AFFINITY_CAPABLE()) {
5070 __kmp_set_system_affinity(old_mask, TRUE);
5071 KMP_CPU_FREE(old_mask);
5074 #if KMP_NESTED_HOT_TEAMS 5076 #endif // KMP_NESTED_HOT_TEAMS 5078 int old_nproc = team->t.t_nproc;
5080 __kmp_initialize_team(team, new_nproc, new_icvs,
5081 root->r.r_uber_thread->th.th_ident);
5084 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5085 for (f = 0; f < team->t.t_nproc; ++f)
5086 __kmp_initialize_info(team->t.t_threads[f], team, f,
5087 __kmp_gtid_from_tid(f, team));
5095 for (f = old_nproc; f < team->t.t_nproc; ++f)
5096 team->t.t_threads[f]->th.th_task_state =
5097 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5100 team->t.t_threads[0]->th.th_task_state;
5101 for (f = old_nproc; f < team->t.t_nproc; ++f)
5102 team->t.t_threads[f]->th.th_task_state = old_state;
5106 for (f = 0; f < team->t.t_nproc; ++f) {
5107 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5108 team->t.t_threads[f]->th.th_team_nproc ==
5113 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5114 #if KMP_AFFINITY_SUPPORTED 5115 __kmp_partition_places(team);
5119 kmp_info_t *master = team->t.t_threads[0];
5120 if (master->th.th_teams_microtask) {
5121 for (f = 1; f < new_nproc; ++f) {
5123 kmp_info_t *thr = team->t.t_threads[f];
5124 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5125 thr->th.th_teams_level = master->th.th_teams_level;
5126 thr->th.th_teams_size = master->th.th_teams_size;
5129 #if KMP_NESTED_HOT_TEAMS 5133 for (f = 1; f < new_nproc; ++f) {
5134 kmp_info_t *thr = team->t.t_threads[f];
5136 kmp_balign_t *balign = thr->th.th_bar;
5137 for (b = 0; b < bs_last_barrier; ++b) {
5138 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5139 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5141 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5146 #endif // KMP_NESTED_HOT_TEAMS 5149 __kmp_alloc_argv_entries(argc, team, TRUE);
5150 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5154 KF_TRACE(10, (
" hot_team = %p\n", team));
5157 if (__kmp_tasking_mode != tskm_immediate_exec) {
5158 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5159 "task_team[1] = %p after reinit\n",
5160 team->t.t_task_team[0], team->t.t_task_team[1]));
5165 __ompt_team_assign_id(team, ompt_parallel_data);
5175 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5178 if (team->t.t_max_nproc >= max_nproc) {
5180 __kmp_team_pool = team->t.t_next_pool;
5183 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5185 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5186 "task_team[1] %p to NULL\n",
5187 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5188 team->t.t_task_team[0] = NULL;
5189 team->t.t_task_team[1] = NULL;
5192 __kmp_alloc_argv_entries(argc, team, TRUE);
5193 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5196 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5197 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5200 for (b = 0; b < bs_last_barrier; ++b) {
5201 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5203 team->t.t_bar[b].b_master_arrived = 0;
5204 team->t.t_bar[b].b_team_arrived = 0;
5209 team->t.t_proc_bind = new_proc_bind;
5211 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5215 __ompt_team_assign_id(team, ompt_parallel_data);
5227 team = __kmp_reap_team(team);
5228 __kmp_team_pool = team;
5233 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5236 team->t.t_max_nproc = max_nproc;
5239 __kmp_allocate_team_arrays(team, max_nproc);
5241 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5242 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5244 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5246 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5247 team->t.t_task_team[0] = NULL;
5249 team->t.t_task_team[1] = NULL;
5252 if (__kmp_storage_map) {
5253 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5257 __kmp_alloc_argv_entries(argc, team, FALSE);
5258 team->t.t_argc = argc;
5261 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5262 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5265 for (b = 0; b < bs_last_barrier; ++b) {
5266 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5268 team->t.t_bar[b].b_master_arrived = 0;
5269 team->t.t_bar[b].b_team_arrived = 0;
5274 team->t.t_proc_bind = new_proc_bind;
5277 __ompt_team_assign_id(team, ompt_parallel_data);
5278 team->t.ompt_serialized_team_info = NULL;
5283 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5294 void __kmp_free_team(kmp_root_t *root,
5295 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5297 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5301 KMP_DEBUG_ASSERT(root);
5302 KMP_DEBUG_ASSERT(team);
5303 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5304 KMP_DEBUG_ASSERT(team->t.t_threads);
5306 int use_hot_team = team == root->r.r_hot_team;
5307 #if KMP_NESTED_HOT_TEAMS 5309 kmp_hot_team_ptr_t *hot_teams;
5311 level = team->t.t_active_level - 1;
5312 if (master->th.th_teams_microtask) {
5313 if (master->th.th_teams_size.nteams > 1) {
5317 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5318 master->th.th_teams_level == team->t.t_level) {
5323 hot_teams = master->th.th_hot_teams;
5324 if (level < __kmp_hot_teams_max_level) {
5325 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5329 #endif // KMP_NESTED_HOT_TEAMS 5332 TCW_SYNC_PTR(team->t.t_pkfn,
5335 team->t.t_copyin_counter = 0;
5340 if (!use_hot_team) {
5341 if (__kmp_tasking_mode != tskm_immediate_exec) {
5343 for (f = 1; f < team->t.t_nproc; ++f) {
5344 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5345 kmp_info_t *th = team->t.t_threads[f];
5346 volatile kmp_uint32 *state = &th->th.th_reap_state;
5347 while (*state != KMP_SAFE_TO_REAP) {
5351 if (!__kmp_is_thread_alive(th, &ecode)) {
5352 *state = KMP_SAFE_TO_REAP;
5357 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5358 if (fl.is_sleeping())
5359 fl.resume(__kmp_gtid_from_thread(th));
5366 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5367 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5368 if (task_team != NULL) {
5369 for (f = 0; f < team->t.t_nproc; ++f) {
5370 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5371 team->t.t_threads[f]->th.th_task_team = NULL;
5375 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5376 __kmp_get_gtid(), task_team, team->t.t_id));
5377 #if KMP_NESTED_HOT_TEAMS 5378 __kmp_free_task_team(master, task_team);
5380 team->t.t_task_team[tt_idx] = NULL;
5386 team->t.t_parent = NULL;
5387 team->t.t_level = 0;
5388 team->t.t_active_level = 0;
5391 for (f = 1; f < team->t.t_nproc; ++f) {
5392 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5393 __kmp_free_thread(team->t.t_threads[f]);
5394 team->t.t_threads[f] = NULL;
5399 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5400 __kmp_team_pool = (
volatile kmp_team_t *)team;
5403 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5404 team->t.t_threads[1]->th.th_cg_roots);
5405 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5407 for (f = 1; f < team->t.t_nproc; ++f) {
5408 kmp_info_t *thr = team->t.t_threads[f];
5409 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5410 thr->th.th_cg_roots->cg_root == thr);
5412 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5413 thr->th.th_cg_roots = tmp->up;
5414 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving" 5415 " up to node %p. cg_nthreads was %d\n",
5416 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5417 int i = tmp->cg_nthreads--;
5422 if (thr->th.th_cg_roots)
5423 thr->th.th_current_task->td_icvs.thread_limit =
5424 thr->th.th_cg_roots->cg_thread_limit;
5433 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5434 kmp_team_t *next_pool = team->t.t_next_pool;
5436 KMP_DEBUG_ASSERT(team);
5437 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5438 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5439 KMP_DEBUG_ASSERT(team->t.t_threads);
5440 KMP_DEBUG_ASSERT(team->t.t_argv);
5445 __kmp_free_team_arrays(team);
5446 if (team->t.t_argv != &team->t.t_inline_argv[0])
5447 __kmp_free((
void *)team->t.t_argv);
5479 void __kmp_free_thread(kmp_info_t *this_th) {
5483 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5484 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5486 KMP_DEBUG_ASSERT(this_th);
5491 kmp_balign_t *balign = this_th->th.th_bar;
5492 for (b = 0; b < bs_last_barrier; ++b) {
5493 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5494 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5495 balign[b].bb.team = NULL;
5496 balign[b].bb.leaf_kids = 0;
5498 this_th->th.th_task_state = 0;
5499 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5502 TCW_PTR(this_th->th.th_team, NULL);
5503 TCW_PTR(this_th->th.th_root, NULL);
5504 TCW_PTR(this_th->th.th_dispatch, NULL);
5506 while (this_th->th.th_cg_roots) {
5507 this_th->th.th_cg_roots->cg_nthreads--;
5508 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node" 5509 " %p of thread %p to %d\n",
5510 this_th, this_th->th.th_cg_roots,
5511 this_th->th.th_cg_roots->cg_root,
5512 this_th->th.th_cg_roots->cg_nthreads));
5513 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5514 if (tmp->cg_root == this_th) {
5515 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5517 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5518 this_th->th.th_cg_roots = tmp->up;
5521 if (tmp->cg_nthreads == 0) {
5524 this_th->th.th_cg_roots = NULL;
5534 __kmp_free_implicit_task(this_th);
5535 this_th->th.th_current_task = NULL;
5539 gtid = this_th->th.th_info.ds.ds_gtid;
5540 if (__kmp_thread_pool_insert_pt != NULL) {
5541 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5542 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5543 __kmp_thread_pool_insert_pt = NULL;
5552 if (__kmp_thread_pool_insert_pt != NULL) {
5553 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5555 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5557 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5558 scan = &((*scan)->th.th_next_pool))
5563 TCW_PTR(this_th->th.th_next_pool, *scan);
5564 __kmp_thread_pool_insert_pt = *scan = this_th;
5565 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5566 (this_th->th.th_info.ds.ds_gtid <
5567 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5568 TCW_4(this_th->th.th_in_pool, TRUE);
5569 __kmp_suspend_initialize_thread(this_th);
5570 __kmp_lock_suspend_mx(this_th);
5571 if (this_th->th.th_active == TRUE) {
5572 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5573 this_th->th.th_active_in_pool = TRUE;
5577 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5580 __kmp_unlock_suspend_mx(this_th);
5582 TCW_4(__kmp_nth, __kmp_nth - 1);
5584 #ifdef KMP_ADJUST_BLOCKTIME 5587 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5588 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5589 if (__kmp_nth <= __kmp_avail_proc) {
5590 __kmp_zero_bt = FALSE;
5600 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5601 int gtid = this_thr->th.th_info.ds.ds_gtid;
5603 kmp_team_t *(*
volatile pteam);
5606 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5608 if (__kmp_env_consistency_check) {
5609 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5613 ompt_data_t *thread_data;
5614 if (ompt_enabled.enabled) {
5615 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5616 *thread_data = ompt_data_none;
5618 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5619 this_thr->th.ompt_thread_info.wait_id = 0;
5620 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5621 if (ompt_enabled.ompt_callback_thread_begin) {
5622 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5623 ompt_thread_worker, thread_data);
5629 if (ompt_enabled.enabled) {
5630 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5634 while (!TCR_4(__kmp_global.g.g_done)) {
5635 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5639 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5642 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5645 if (ompt_enabled.enabled) {
5646 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5650 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5653 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5655 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5658 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5659 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5660 (*pteam)->t.t_pkfn));
5662 updateHWFPControl(*pteam);
5665 if (ompt_enabled.enabled) {
5666 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5670 rc = (*pteam)->t.t_invoke(gtid);
5674 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5675 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5676 (*pteam)->t.t_pkfn));
5679 if (ompt_enabled.enabled) {
5681 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5683 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5687 __kmp_join_barrier(gtid);
5690 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5693 if (ompt_enabled.ompt_callback_thread_end) {
5694 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5698 this_thr->th.th_task_team = NULL;
5700 __kmp_common_destroy_gtid(gtid);
5702 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5709 void __kmp_internal_end_dest(
void *specific_gtid) {
5710 #if KMP_COMPILER_ICC 5711 #pragma warning(push) 5712 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5716 int gtid = (kmp_intptr_t)specific_gtid - 1;
5717 #if KMP_COMPILER_ICC 5718 #pragma warning(pop) 5721 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5734 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5735 __kmp_gtid_set_specific(gtid);
5736 #ifdef KMP_TDATA_GTID 5739 __kmp_internal_end_thread(gtid);
5742 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5748 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5749 __kmp_internal_end_atexit();
5752 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5758 void __kmp_internal_end_atexit(
void) {
5759 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5783 __kmp_internal_end_library(-1);
5785 __kmp_close_console();
5789 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5794 KMP_DEBUG_ASSERT(thread != NULL);
5796 gtid = thread->th.th_info.ds.ds_gtid;
5799 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5802 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5806 ANNOTATE_HAPPENS_BEFORE(thread);
5807 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5808 __kmp_release_64(&flag);
5812 __kmp_reap_worker(thread);
5824 if (thread->th.th_active_in_pool) {
5825 thread->th.th_active_in_pool = FALSE;
5826 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5827 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5831 __kmp_free_implicit_task(thread);
5835 __kmp_free_fast_memory(thread);
5838 __kmp_suspend_uninitialize_thread(thread);
5840 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5841 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5846 #ifdef KMP_ADJUST_BLOCKTIME 5849 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5850 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5851 if (__kmp_nth <= __kmp_avail_proc) {
5852 __kmp_zero_bt = FALSE;
5858 if (__kmp_env_consistency_check) {
5859 if (thread->th.th_cons) {
5860 __kmp_free_cons_stack(thread->th.th_cons);
5861 thread->th.th_cons = NULL;
5865 if (thread->th.th_pri_common != NULL) {
5866 __kmp_free(thread->th.th_pri_common);
5867 thread->th.th_pri_common = NULL;
5870 if (thread->th.th_task_state_memo_stack != NULL) {
5871 __kmp_free(thread->th.th_task_state_memo_stack);
5872 thread->th.th_task_state_memo_stack = NULL;
5876 if (thread->th.th_local.bget_data != NULL) {
5877 __kmp_finalize_bget(thread);
5881 #if KMP_AFFINITY_SUPPORTED 5882 if (thread->th.th_affin_mask != NULL) {
5883 KMP_CPU_FREE(thread->th.th_affin_mask);
5884 thread->th.th_affin_mask = NULL;
5888 #if KMP_USE_HIER_SCHED 5889 if (thread->th.th_hier_bar_data != NULL) {
5890 __kmp_free(thread->th.th_hier_bar_data);
5891 thread->th.th_hier_bar_data = NULL;
5895 __kmp_reap_team(thread->th.th_serial_team);
5896 thread->th.th_serial_team = NULL;
5903 static void __kmp_internal_end(
void) {
5907 __kmp_unregister_library();
5914 __kmp_reclaim_dead_roots();
5918 for (i = 0; i < __kmp_threads_capacity; i++)
5920 if (__kmp_root[i]->r.r_active)
5923 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5925 if (i < __kmp_threads_capacity) {
5937 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5938 if (TCR_4(__kmp_init_monitor)) {
5939 __kmp_reap_monitor(&__kmp_monitor);
5940 TCW_4(__kmp_init_monitor, 0);
5942 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5943 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5944 #endif // KMP_USE_MONITOR 5949 for (i = 0; i < __kmp_threads_capacity; i++) {
5950 if (__kmp_root[i]) {
5953 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5962 while (__kmp_thread_pool != NULL) {
5964 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5965 __kmp_thread_pool = thread->th.th_next_pool;
5967 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5968 thread->th.th_next_pool = NULL;
5969 thread->th.th_in_pool = FALSE;
5970 __kmp_reap_thread(thread, 0);
5972 __kmp_thread_pool_insert_pt = NULL;
5975 while (__kmp_team_pool != NULL) {
5977 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5978 __kmp_team_pool = team->t.t_next_pool;
5980 team->t.t_next_pool = NULL;
5981 __kmp_reap_team(team);
5984 __kmp_reap_task_teams();
5991 for (i = 0; i < __kmp_threads_capacity; i++) {
5992 kmp_info_t *thr = __kmp_threads[i];
5993 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
5998 for (i = 0; i < __kmp_threads_capacity; ++i) {
6005 TCW_SYNC_4(__kmp_init_common, FALSE);
6007 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6015 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6016 if (TCR_4(__kmp_init_monitor)) {
6017 __kmp_reap_monitor(&__kmp_monitor);
6018 TCW_4(__kmp_init_monitor, 0);
6020 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6021 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6024 TCW_4(__kmp_init_gtid, FALSE);
6033 void __kmp_internal_end_library(
int gtid_req) {
6040 if (__kmp_global.g.g_abort) {
6041 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6045 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6046 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6054 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6056 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6057 if (gtid == KMP_GTID_SHUTDOWN) {
6058 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6059 "already shutdown\n"));
6061 }
else if (gtid == KMP_GTID_MONITOR) {
6062 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6063 "registered, or system shutdown\n"));
6065 }
else if (gtid == KMP_GTID_DNE) {
6066 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6069 }
else if (KMP_UBER_GTID(gtid)) {
6071 if (__kmp_root[gtid]->r.r_active) {
6072 __kmp_global.g.g_abort = -1;
6073 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6075 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6081 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6082 __kmp_unregister_root_current_thread(gtid);
6089 #ifdef DUMP_DEBUG_ON_EXIT 6090 if (__kmp_debug_buf)
6091 __kmp_dump_debug_buffer();
6097 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6100 if (__kmp_global.g.g_abort) {
6101 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6103 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6106 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6107 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6116 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6119 __kmp_internal_end();
6121 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6122 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6124 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6126 #ifdef DUMP_DEBUG_ON_EXIT 6127 if (__kmp_debug_buf)
6128 __kmp_dump_debug_buffer();
6132 __kmp_close_console();
6135 __kmp_fini_allocator();
6139 void __kmp_internal_end_thread(
int gtid_req) {
6148 if (__kmp_global.g.g_abort) {
6149 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6153 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6154 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6162 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6164 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6165 if (gtid == KMP_GTID_SHUTDOWN) {
6166 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6167 "already shutdown\n"));
6169 }
else if (gtid == KMP_GTID_MONITOR) {
6170 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6171 "registered, or system shutdown\n"));
6173 }
else if (gtid == KMP_GTID_DNE) {
6174 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6178 }
else if (KMP_UBER_GTID(gtid)) {
6180 if (__kmp_root[gtid]->r.r_active) {
6181 __kmp_global.g.g_abort = -1;
6182 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6184 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6188 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6190 __kmp_unregister_root_current_thread(gtid);
6194 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6197 __kmp_threads[gtid]->th.th_task_team = NULL;
6201 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6207 if (__kmp_pause_status != kmp_hard_paused)
6211 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6216 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6219 if (__kmp_global.g.g_abort) {
6220 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6222 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6225 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6226 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6237 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6239 for (i = 0; i < __kmp_threads_capacity; ++i) {
6240 if (KMP_UBER_GTID(i)) {
6243 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6244 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6252 __kmp_internal_end();
6254 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6255 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6257 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6259 #ifdef DUMP_DEBUG_ON_EXIT 6260 if (__kmp_debug_buf)
6261 __kmp_dump_debug_buffer();
6268 static long __kmp_registration_flag = 0;
6270 static char *__kmp_registration_str = NULL;
6273 static inline char *__kmp_reg_status_name() {
6278 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6281 void __kmp_register_library_startup(
void) {
6283 char *name = __kmp_reg_status_name();
6289 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6290 __kmp_initialize_system_tick();
6292 __kmp_read_system_time(&time.dtime);
6293 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6294 __kmp_registration_str =
6295 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6296 __kmp_registration_flag, KMP_LIBRARY_FILE);
6298 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6299 __kmp_registration_str));
6306 __kmp_env_set(name, __kmp_registration_str, 0);
6308 value = __kmp_env_get(name);
6309 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6319 char *flag_addr_str = NULL;
6320 char *flag_val_str = NULL;
6321 char const *file_name = NULL;
6322 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6323 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6326 long *flag_addr = 0;
6328 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6329 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6330 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6334 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6348 file_name =
"unknown library";
6353 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6354 if (!__kmp_str_match_true(duplicate_ok)) {
6356 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6357 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6359 KMP_INTERNAL_FREE(duplicate_ok);
6360 __kmp_duplicate_library_ok = 1;
6365 __kmp_env_unset(name);
6367 default: { KMP_DEBUG_ASSERT(0); }
break;
6370 KMP_INTERNAL_FREE((
void *)value);
6372 KMP_INTERNAL_FREE((
void *)name);
6376 void __kmp_unregister_library(
void) {
6378 char *name = __kmp_reg_status_name();
6379 char *value = __kmp_env_get(name);
6381 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6382 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6383 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6385 __kmp_env_unset(name);
6388 KMP_INTERNAL_FREE(__kmp_registration_str);
6389 KMP_INTERNAL_FREE(value);
6390 KMP_INTERNAL_FREE(name);
6392 __kmp_registration_flag = 0;
6393 __kmp_registration_str = NULL;
6400 #if KMP_MIC_SUPPORTED 6402 static void __kmp_check_mic_type() {
6403 kmp_cpuid_t cpuid_state = {0};
6404 kmp_cpuid_t *cs_p = &cpuid_state;
6405 __kmp_x86_cpuid(1, 0, cs_p);
6407 if ((cs_p->eax & 0xff0) == 0xB10) {
6408 __kmp_mic_type = mic2;
6409 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6410 __kmp_mic_type = mic3;
6412 __kmp_mic_type = non_mic;
6418 static void __kmp_do_serial_initialize(
void) {
6422 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6424 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6425 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6426 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6427 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6428 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6434 __kmp_validate_locks();
6437 __kmp_init_allocator();
6442 __kmp_register_library_startup();
6445 if (TCR_4(__kmp_global.g.g_done)) {
6446 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6449 __kmp_global.g.g_abort = 0;
6450 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6453 #if KMP_USE_ADAPTIVE_LOCKS 6454 #if KMP_DEBUG_ADAPTIVE_LOCKS 6455 __kmp_init_speculative_stats();
6458 #if KMP_STATS_ENABLED 6461 __kmp_init_lock(&__kmp_global_lock);
6462 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6463 __kmp_init_lock(&__kmp_debug_lock);
6464 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6465 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6466 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6467 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6468 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6469 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6470 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6471 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6472 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6473 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6474 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6475 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6476 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6477 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6478 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6480 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6482 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6486 __kmp_runtime_initialize();
6488 #if KMP_MIC_SUPPORTED 6489 __kmp_check_mic_type();
6496 __kmp_abort_delay = 0;
6500 __kmp_dflt_team_nth_ub = __kmp_xproc;
6501 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6502 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6504 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6505 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6507 __kmp_max_nth = __kmp_sys_max_nth;
6508 __kmp_cg_max_nth = __kmp_sys_max_nth;
6509 __kmp_teams_max_nth = __kmp_xproc;
6510 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6511 __kmp_teams_max_nth = __kmp_sys_max_nth;
6516 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6518 __kmp_monitor_wakeups =
6519 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6520 __kmp_bt_intervals =
6521 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6524 __kmp_library = library_throughput;
6526 __kmp_static = kmp_sch_static_balanced;
6533 #if KMP_FAST_REDUCTION_BARRIER 6534 #define kmp_reduction_barrier_gather_bb ((int)1) 6535 #define kmp_reduction_barrier_release_bb ((int)1) 6536 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6537 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6538 #endif // KMP_FAST_REDUCTION_BARRIER 6539 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6540 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6541 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6542 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6543 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6544 #if KMP_FAST_REDUCTION_BARRIER 6545 if (i == bs_reduction_barrier) {
6547 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6548 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6549 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6550 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6552 #endif // KMP_FAST_REDUCTION_BARRIER 6554 #if KMP_FAST_REDUCTION_BARRIER 6555 #undef kmp_reduction_barrier_release_pat 6556 #undef kmp_reduction_barrier_gather_pat 6557 #undef kmp_reduction_barrier_release_bb 6558 #undef kmp_reduction_barrier_gather_bb 6559 #endif // KMP_FAST_REDUCTION_BARRIER 6560 #if KMP_MIC_SUPPORTED 6561 if (__kmp_mic_type == mic2) {
6563 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6564 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6566 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6567 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6569 #if KMP_FAST_REDUCTION_BARRIER 6570 if (__kmp_mic_type == mic2) {
6571 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6572 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6574 #endif // KMP_FAST_REDUCTION_BARRIER 6575 #endif // KMP_MIC_SUPPORTED 6579 __kmp_env_checks = TRUE;
6581 __kmp_env_checks = FALSE;
6585 __kmp_foreign_tp = TRUE;
6587 __kmp_global.g.g_dynamic = FALSE;
6588 __kmp_global.g.g_dynamic_mode = dynamic_default;
6590 __kmp_env_initialize(NULL);
6594 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6595 if (__kmp_str_match_true(val)) {
6596 kmp_str_buf_t buffer;
6597 __kmp_str_buf_init(&buffer);
6598 __kmp_i18n_dump_catalog(&buffer);
6599 __kmp_printf(
"%s", buffer.str);
6600 __kmp_str_buf_free(&buffer);
6602 __kmp_env_free(&val);
6605 __kmp_threads_capacity =
6606 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6608 __kmp_tp_capacity = __kmp_default_tp_capacity(
6609 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6614 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6615 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6616 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6617 __kmp_thread_pool = NULL;
6618 __kmp_thread_pool_insert_pt = NULL;
6619 __kmp_team_pool = NULL;
6626 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6628 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6629 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6630 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6633 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6635 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6640 gtid = __kmp_register_root(TRUE);
6641 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6642 KMP_ASSERT(KMP_UBER_GTID(gtid));
6643 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6647 __kmp_common_initialize();
6651 __kmp_register_atfork();
6654 #if !KMP_DYNAMIC_LIB 6658 int rc = atexit(__kmp_internal_end_atexit);
6660 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6666 #if KMP_HANDLE_SIGNALS 6672 __kmp_install_signals(FALSE);
6675 __kmp_install_signals(TRUE);
6680 __kmp_init_counter++;
6682 __kmp_init_serial = TRUE;
6684 if (__kmp_settings) {
6688 if (__kmp_display_env || __kmp_display_env_verbose) {
6689 __kmp_env_print_2();
6698 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6701 void __kmp_serial_initialize(
void) {
6702 if (__kmp_init_serial) {
6705 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6706 if (__kmp_init_serial) {
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6710 __kmp_do_serial_initialize();
6711 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6714 static void __kmp_do_middle_initialize(
void) {
6716 int prev_dflt_team_nth;
6718 if (!__kmp_init_serial) {
6719 __kmp_do_serial_initialize();
6722 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6726 prev_dflt_team_nth = __kmp_dflt_team_nth;
6728 #if KMP_AFFINITY_SUPPORTED 6731 __kmp_affinity_initialize();
6735 for (i = 0; i < __kmp_threads_capacity; i++) {
6736 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6737 __kmp_affinity_set_init_mask(i, TRUE);
6742 KMP_ASSERT(__kmp_xproc > 0);
6743 if (__kmp_avail_proc == 0) {
6744 __kmp_avail_proc = __kmp_xproc;
6750 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6751 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6756 if (__kmp_dflt_team_nth == 0) {
6757 #ifdef KMP_DFLT_NTH_CORES 6759 __kmp_dflt_team_nth = __kmp_ncores;
6760 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6761 "__kmp_ncores (%d)\n",
6762 __kmp_dflt_team_nth));
6765 __kmp_dflt_team_nth = __kmp_avail_proc;
6766 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6767 "__kmp_avail_proc(%d)\n",
6768 __kmp_dflt_team_nth));
6772 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6773 __kmp_dflt_team_nth = KMP_MIN_NTH;
6775 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6776 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6781 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6783 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6788 for (i = 0; i < __kmp_threads_capacity; i++) {
6789 kmp_info_t *thread = __kmp_threads[i];
6792 if (thread->th.th_current_task->td_icvs.nproc != 0)
6795 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6800 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6801 __kmp_dflt_team_nth));
6803 #ifdef KMP_ADJUST_BLOCKTIME 6805 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6806 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6807 if (__kmp_nth > __kmp_avail_proc) {
6808 __kmp_zero_bt = TRUE;
6814 TCW_SYNC_4(__kmp_init_middle, TRUE);
6816 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6819 void __kmp_middle_initialize(
void) {
6820 if (__kmp_init_middle) {
6823 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6824 if (__kmp_init_middle) {
6825 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6828 __kmp_do_middle_initialize();
6829 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6832 void __kmp_parallel_initialize(
void) {
6833 int gtid = __kmp_entry_gtid();
6836 if (TCR_4(__kmp_init_parallel))
6838 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6839 if (TCR_4(__kmp_init_parallel)) {
6840 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6845 if (TCR_4(__kmp_global.g.g_done)) {
6848 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6849 __kmp_infinite_loop();
6855 if (!__kmp_init_middle) {
6856 __kmp_do_middle_initialize();
6858 __kmp_resume_if_hard_paused();
6861 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6862 KMP_ASSERT(KMP_UBER_GTID(gtid));
6864 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6867 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6868 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6869 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6873 #if KMP_HANDLE_SIGNALS 6875 __kmp_install_signals(TRUE);
6879 __kmp_suspend_initialize();
6881 #if defined(USE_LOAD_BALANCE) 6882 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6883 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6886 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6887 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6891 if (__kmp_version) {
6892 __kmp_print_version_2();
6896 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6899 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6901 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6906 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6908 kmp_disp_t *dispatch;
6913 this_thr->th.th_local.this_construct = 0;
6914 #if KMP_CACHE_MANAGE 6915 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6917 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6918 KMP_DEBUG_ASSERT(dispatch);
6919 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6923 dispatch->th_disp_index = 0;
6924 dispatch->th_doacross_buf_idx = 0;
6925 if (__kmp_env_consistency_check)
6926 __kmp_push_parallel(gtid, team->t.t_ident);
6931 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6933 if (__kmp_env_consistency_check)
6934 __kmp_pop_parallel(gtid, team->t.t_ident);
6936 __kmp_finish_implicit_task(this_thr);
6939 int __kmp_invoke_task_func(
int gtid) {
6941 int tid = __kmp_tid_from_gtid(gtid);
6942 kmp_info_t *this_thr = __kmp_threads[gtid];
6943 kmp_team_t *team = this_thr->th.th_team;
6945 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6947 if (__itt_stack_caller_create_ptr) {
6948 __kmp_itt_stack_callee_enter(
6950 team->t.t_stack_id);
6953 #if INCLUDE_SSC_MARKS 6954 SSC_MARK_INVOKING();
6959 void **exit_runtime_p;
6960 ompt_data_t *my_task_data;
6961 ompt_data_t *my_parallel_data;
6964 if (ompt_enabled.enabled) {
6966 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
6968 exit_runtime_p = &dummy;
6972 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6973 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6974 if (ompt_enabled.ompt_callback_implicit_task) {
6975 ompt_team_size = team->t.t_nproc;
6976 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6977 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6978 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
6979 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
6983 #if KMP_STATS_ENABLED 6985 if (previous_state == stats_state_e::TEAMS_REGION) {
6986 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
6988 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
6990 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
6993 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6994 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7001 *exit_runtime_p = NULL;
7004 #if KMP_STATS_ENABLED 7005 if (previous_state == stats_state_e::TEAMS_REGION) {
7006 KMP_SET_THREAD_STATE(previous_state);
7008 KMP_POP_PARTITIONED_TIMER();
7012 if (__itt_stack_caller_create_ptr) {
7013 __kmp_itt_stack_callee_leave(
7015 team->t.t_stack_id);
7018 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7023 void __kmp_teams_master(
int gtid) {
7025 kmp_info_t *thr = __kmp_threads[gtid];
7026 kmp_team_t *team = thr->th.th_team;
7027 ident_t *loc = team->t.t_ident;
7028 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7029 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7030 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7031 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7032 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7035 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7038 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7039 tmp->cg_nthreads = 1;
7040 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init" 7041 " cg_nthreads to 1\n",
7043 tmp->up = thr->th.th_cg_roots;
7044 thr->th.th_cg_roots = tmp;
7048 #if INCLUDE_SSC_MARKS 7051 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7052 (microtask_t)thr->th.th_teams_microtask,
7053 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7054 #if INCLUDE_SSC_MARKS 7058 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7059 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7062 __kmp_join_call(loc, gtid
7071 int __kmp_invoke_teams_master(
int gtid) {
7072 kmp_info_t *this_thr = __kmp_threads[gtid];
7073 kmp_team_t *team = this_thr->th.th_team;
7075 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7076 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7077 (
void *)__kmp_teams_master);
7079 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7080 __kmp_teams_master(gtid);
7081 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7090 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7091 kmp_info_t *thr = __kmp_threads[gtid];
7093 if (num_threads > 0)
7094 thr->th.th_set_nproc = num_threads;
7099 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7101 kmp_info_t *thr = __kmp_threads[gtid];
7102 KMP_DEBUG_ASSERT(num_teams >= 0);
7103 KMP_DEBUG_ASSERT(num_threads >= 0);
7107 if (num_teams > __kmp_teams_max_nth) {
7108 if (!__kmp_reserve_warn) {
7109 __kmp_reserve_warn = 1;
7110 __kmp_msg(kmp_ms_warning,
7111 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7112 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7114 num_teams = __kmp_teams_max_nth;
7118 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7121 if (num_threads == 0) {
7122 if (!TCR_4(__kmp_init_middle))
7123 __kmp_middle_initialize();
7124 num_threads = __kmp_avail_proc / num_teams;
7125 if (num_teams * num_threads > __kmp_teams_max_nth) {
7127 num_threads = __kmp_teams_max_nth / num_teams;
7132 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7134 if (num_teams * num_threads > __kmp_teams_max_nth) {
7135 int new_threads = __kmp_teams_max_nth / num_teams;
7136 if (!__kmp_reserve_warn) {
7137 __kmp_reserve_warn = 1;
7138 __kmp_msg(kmp_ms_warning,
7139 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7140 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7142 num_threads = new_threads;
7145 thr->th.th_teams_size.nth = num_threads;
7149 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7150 kmp_info_t *thr = __kmp_threads[gtid];
7151 thr->th.th_set_proc_bind = proc_bind;
7156 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7157 kmp_info_t *this_thr = __kmp_threads[gtid];
7163 KMP_DEBUG_ASSERT(team);
7164 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7165 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7168 team->t.t_construct = 0;
7169 team->t.t_ordered.dt.t_value =
7173 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7174 if (team->t.t_max_nproc > 1) {
7176 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7177 team->t.t_disp_buffer[i].buffer_index = i;
7178 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7181 team->t.t_disp_buffer[0].buffer_index = 0;
7182 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7186 KMP_ASSERT(this_thr->th.th_team == team);
7189 for (f = 0; f < team->t.t_nproc; f++) {
7190 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7191 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7196 __kmp_fork_barrier(gtid, 0);
7199 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7200 kmp_info_t *this_thr = __kmp_threads[gtid];
7202 KMP_DEBUG_ASSERT(team);
7203 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7204 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7210 if (__kmp_threads[gtid] &&
7211 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7212 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7213 __kmp_threads[gtid]);
7214 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7215 "team->t.t_nproc=%d\n",
7216 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7218 __kmp_print_structure();
7220 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7221 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7224 __kmp_join_barrier(gtid);
7226 if (ompt_enabled.enabled &&
7227 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7228 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7229 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7230 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7232 void *codeptr = NULL;
7233 if (KMP_MASTER_TID(ds_tid) &&
7234 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7235 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7236 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7238 if (ompt_enabled.ompt_callback_sync_region_wait) {
7239 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7240 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7243 if (ompt_enabled.ompt_callback_sync_region) {
7244 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7245 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7249 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7250 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7251 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7257 KMP_ASSERT(this_thr->th.th_team == team);
7262 #ifdef USE_LOAD_BALANCE 7266 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7269 kmp_team_t *hot_team;
7271 if (root->r.r_active) {
7274 hot_team = root->r.r_hot_team;
7275 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7276 return hot_team->t.t_nproc - 1;
7281 for (i = 1; i < hot_team->t.t_nproc; i++) {
7282 if (hot_team->t.t_threads[i]->th.th_active) {
7291 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7294 int hot_team_active;
7295 int team_curr_active;
7298 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7300 KMP_DEBUG_ASSERT(root);
7301 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7302 ->th.th_current_task->td_icvs.dynamic == TRUE);
7303 KMP_DEBUG_ASSERT(set_nproc > 1);
7305 if (set_nproc == 1) {
7306 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7315 pool_active = __kmp_thread_pool_active_nth;
7316 hot_team_active = __kmp_active_hot_team_nproc(root);
7317 team_curr_active = pool_active + hot_team_active + 1;
7320 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7321 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7322 "hot team active = %d\n",
7323 system_active, pool_active, hot_team_active));
7325 if (system_active < 0) {
7329 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7330 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7333 retval = __kmp_avail_proc - __kmp_nth +
7334 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7335 if (retval > set_nproc) {
7338 if (retval < KMP_MIN_NTH) {
7339 retval = KMP_MIN_NTH;
7342 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7350 if (system_active < team_curr_active) {
7351 system_active = team_curr_active;
7353 retval = __kmp_avail_proc - system_active + team_curr_active;
7354 if (retval > set_nproc) {
7357 if (retval < KMP_MIN_NTH) {
7358 retval = KMP_MIN_NTH;
7361 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7370 void __kmp_cleanup(
void) {
7373 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7375 if (TCR_4(__kmp_init_parallel)) {
7376 #if KMP_HANDLE_SIGNALS 7377 __kmp_remove_signals();
7379 TCW_4(__kmp_init_parallel, FALSE);
7382 if (TCR_4(__kmp_init_middle)) {
7383 #if KMP_AFFINITY_SUPPORTED 7384 __kmp_affinity_uninitialize();
7386 __kmp_cleanup_hierarchy();
7387 TCW_4(__kmp_init_middle, FALSE);
7390 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7392 if (__kmp_init_serial) {
7393 __kmp_runtime_destroy();
7394 __kmp_init_serial = FALSE;
7397 __kmp_cleanup_threadprivate_caches();
7399 for (f = 0; f < __kmp_threads_capacity; f++) {
7400 if (__kmp_root[f] != NULL) {
7401 __kmp_free(__kmp_root[f]);
7402 __kmp_root[f] = NULL;
7405 __kmp_free(__kmp_threads);
7408 __kmp_threads = NULL;
7410 __kmp_threads_capacity = 0;
7412 #if KMP_USE_DYNAMIC_LOCK 7413 __kmp_cleanup_indirect_user_locks();
7415 __kmp_cleanup_user_locks();
7418 #if KMP_AFFINITY_SUPPORTED 7419 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7420 __kmp_cpuinfo_file = NULL;
7423 #if KMP_USE_ADAPTIVE_LOCKS 7424 #if KMP_DEBUG_ADAPTIVE_LOCKS 7425 __kmp_print_speculative_stats();
7428 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7429 __kmp_nested_nth.nth = NULL;
7430 __kmp_nested_nth.size = 0;
7431 __kmp_nested_nth.used = 0;
7432 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7433 __kmp_nested_proc_bind.bind_types = NULL;
7434 __kmp_nested_proc_bind.size = 0;
7435 __kmp_nested_proc_bind.used = 0;
7436 if (__kmp_affinity_format) {
7437 KMP_INTERNAL_FREE(__kmp_affinity_format);
7438 __kmp_affinity_format = NULL;
7441 __kmp_i18n_catclose();
7443 #if KMP_USE_HIER_SCHED 7444 __kmp_hier_scheds.deallocate();
7447 #if KMP_STATS_ENABLED 7451 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7456 int __kmp_ignore_mppbeg(
void) {
7459 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7460 if (__kmp_str_match_false(env))
7467 int __kmp_ignore_mppend(
void) {
7470 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7471 if (__kmp_str_match_false(env))
7478 void __kmp_internal_begin(
void) {
7484 gtid = __kmp_entry_gtid();
7485 root = __kmp_threads[gtid]->th.th_root;
7486 KMP_ASSERT(KMP_UBER_GTID(gtid));
7488 if (root->r.r_begin)
7490 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7491 if (root->r.r_begin) {
7492 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7496 root->r.r_begin = TRUE;
7498 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7503 void __kmp_user_set_library(
enum library_type arg) {
7510 gtid = __kmp_entry_gtid();
7511 thread = __kmp_threads[gtid];
7513 root = thread->th.th_root;
7515 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7517 if (root->r.r_in_parallel) {
7519 KMP_WARNING(SetLibraryIncorrectCall);
7524 case library_serial:
7525 thread->th.th_set_nproc = 0;
7526 set__nproc(thread, 1);
7528 case library_turnaround:
7529 thread->th.th_set_nproc = 0;
7530 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7531 : __kmp_dflt_team_nth_ub);
7533 case library_throughput:
7534 thread->th.th_set_nproc = 0;
7535 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7536 : __kmp_dflt_team_nth_ub);
7539 KMP_FATAL(UnknownLibraryType, arg);
7542 __kmp_aux_set_library(arg);
7545 void __kmp_aux_set_stacksize(
size_t arg) {
7546 if (!__kmp_init_serial)
7547 __kmp_serial_initialize();
7550 if (arg & (0x1000 - 1)) {
7551 arg &= ~(0x1000 - 1);
7556 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7559 if (!TCR_4(__kmp_init_parallel)) {
7562 if (value < __kmp_sys_min_stksize)
7563 value = __kmp_sys_min_stksize;
7564 else if (value > KMP_MAX_STKSIZE)
7565 value = KMP_MAX_STKSIZE;
7567 __kmp_stksize = value;
7569 __kmp_env_stksize = TRUE;
7572 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7577 void __kmp_aux_set_library(
enum library_type arg) {
7578 __kmp_library = arg;
7580 switch (__kmp_library) {
7581 case library_serial: {
7582 KMP_INFORM(LibraryIsSerial);
7584 case library_turnaround:
7585 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7586 __kmp_use_yield = 2;
7588 case library_throughput:
7589 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7590 __kmp_dflt_blocktime = 200;
7593 KMP_FATAL(UnknownLibraryType, arg);
7599 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7600 kmp_info_t *thr = __kmp_entry_thread();
7601 teams_serialized = 0;
7602 if (thr->th.th_teams_microtask) {
7603 kmp_team_t *team = thr->th.th_team;
7604 int tlevel = thr->th.th_teams_level;
7605 int ii = team->t.t_level;
7606 teams_serialized = team->t.t_serialized;
7607 int level = tlevel + 1;
7608 KMP_DEBUG_ASSERT(ii >= tlevel);
7609 while (ii > level) {
7610 for (teams_serialized = team->t.t_serialized;
7611 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7613 if (team->t.t_serialized && (!teams_serialized)) {
7614 team = team->t.t_parent;
7618 team = team->t.t_parent;
7627 int __kmp_aux_get_team_num() {
7629 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7631 if (serialized > 1) {
7634 return team->t.t_master_tid;
7640 int __kmp_aux_get_num_teams() {
7642 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7644 if (serialized > 1) {
7647 return team->t.t_parent->t.t_nproc;
7686 typedef struct kmp_affinity_format_field_t {
7688 const char *long_name;
7691 } kmp_affinity_format_field_t;
7693 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7694 #if KMP_AFFINITY_SUPPORTED 7695 {
'A',
"thread_affinity",
's'},
7697 {
't',
"team_num",
'd'},
7698 {
'T',
"num_teams",
'd'},
7699 {
'L',
"nesting_level",
'd'},
7700 {
'n',
"thread_num",
'd'},
7701 {
'N',
"num_threads",
'd'},
7702 {
'a',
"ancestor_tnum",
'd'},
7704 {
'P',
"process_id",
'd'},
7705 {
'i',
"native_thread_id",
'd'}};
7708 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
7710 kmp_str_buf_t *field_buffer) {
7711 int rc, format_index, field_value;
7712 const char *width_left, *width_right;
7713 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7714 static const int FORMAT_SIZE = 20;
7715 char format[FORMAT_SIZE] = {0};
7716 char absolute_short_name = 0;
7718 KMP_DEBUG_ASSERT(gtid >= 0);
7719 KMP_DEBUG_ASSERT(th);
7720 KMP_DEBUG_ASSERT(**ptr ==
'%');
7721 KMP_DEBUG_ASSERT(field_buffer);
7723 __kmp_str_buf_clear(field_buffer);
7730 __kmp_str_buf_cat(field_buffer,
"%", 1);
7741 right_justify =
false;
7743 right_justify =
true;
7747 width_left = width_right = NULL;
7748 if (**ptr >=
'0' && **ptr <=
'9') {
7756 format[format_index++] =
'%';
7758 format[format_index++] =
'-';
7760 format[format_index++] =
'0';
7761 if (width_left && width_right) {
7765 while (i < 8 && width_left < width_right) {
7766 format[format_index++] = *width_left;
7774 found_valid_name =
false;
7775 parse_long_name = (**ptr ==
'{');
7776 if (parse_long_name)
7778 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
7779 sizeof(__kmp_affinity_format_table[0]);
7781 char short_name = __kmp_affinity_format_table[i].short_name;
7782 const char *long_name = __kmp_affinity_format_table[i].long_name;
7783 char field_format = __kmp_affinity_format_table[i].field_format;
7784 if (parse_long_name) {
7785 int length = KMP_STRLEN(long_name);
7786 if (strncmp(*ptr, long_name, length) == 0) {
7787 found_valid_name =
true;
7790 }
else if (**ptr == short_name) {
7791 found_valid_name =
true;
7794 if (found_valid_name) {
7795 format[format_index++] = field_format;
7796 format[format_index++] =
'\0';
7797 absolute_short_name = short_name;
7801 if (parse_long_name) {
7803 absolute_short_name = 0;
7811 switch (absolute_short_name) {
7813 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
7816 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
7819 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
7822 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
7825 static const int BUFFER_SIZE = 256;
7826 char buf[BUFFER_SIZE];
7827 __kmp_expand_host_name(buf, BUFFER_SIZE);
7828 rc = __kmp_str_buf_print(field_buffer, format, buf);
7831 rc = __kmp_str_buf_print(field_buffer, format, getpid());
7834 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
7837 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
7841 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
7842 rc = __kmp_str_buf_print(field_buffer, format, field_value);
7844 #if KMP_AFFINITY_SUPPORTED 7847 __kmp_str_buf_init(&buf);
7848 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
7849 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
7850 __kmp_str_buf_free(&buf);
7856 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
7858 if (parse_long_name) {
7867 KMP_ASSERT(format_index <= FORMAT_SIZE);
7877 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
7878 kmp_str_buf_t *buffer) {
7879 const char *parse_ptr;
7881 const kmp_info_t *th;
7882 kmp_str_buf_t field;
7884 KMP_DEBUG_ASSERT(buffer);
7885 KMP_DEBUG_ASSERT(gtid >= 0);
7887 __kmp_str_buf_init(&field);
7888 __kmp_str_buf_clear(buffer);
7890 th = __kmp_threads[gtid];
7896 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
7897 parse_ptr = __kmp_affinity_format;
7899 KMP_DEBUG_ASSERT(parse_ptr);
7901 while (*parse_ptr !=
'\0') {
7903 if (*parse_ptr ==
'%') {
7905 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
7906 __kmp_str_buf_catbuf(buffer, &field);
7910 __kmp_str_buf_cat(buffer, parse_ptr, 1);
7915 __kmp_str_buf_free(&field);
7920 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
7922 __kmp_str_buf_init(&buf);
7923 __kmp_aux_capture_affinity(gtid, format, &buf);
7924 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
7925 __kmp_str_buf_free(&buf);
7930 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7931 int blocktime = arg;
7937 __kmp_save_internal_controls(thread);
7940 if (blocktime < KMP_MIN_BLOCKTIME)
7941 blocktime = KMP_MIN_BLOCKTIME;
7942 else if (blocktime > KMP_MAX_BLOCKTIME)
7943 blocktime = KMP_MAX_BLOCKTIME;
7945 set__blocktime_team(thread->th.th_team, tid, blocktime);
7946 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7950 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7952 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7953 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7959 set__bt_set_team(thread->th.th_team, tid, bt_set);
7960 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7962 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7963 "bt_intervals=%d, monitor_updates=%d\n",
7964 __kmp_gtid_from_tid(tid, thread->th.th_team),
7965 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7966 __kmp_monitor_wakeups));
7968 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7969 __kmp_gtid_from_tid(tid, thread->th.th_team),
7970 thread->th.th_team->t.t_id, tid, blocktime));
7974 void __kmp_aux_set_defaults(
char const *str,
int len) {
7975 if (!__kmp_init_serial) {
7976 __kmp_serial_initialize();
7978 __kmp_env_initialize(str);
7980 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
7988 PACKED_REDUCTION_METHOD_T
7989 __kmp_determine_reduction_method(
7990 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7991 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7992 kmp_critical_name *lck) {
8003 PACKED_REDUCTION_METHOD_T retval;
8007 KMP_DEBUG_ASSERT(loc);
8008 KMP_DEBUG_ASSERT(lck);
8010 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 8011 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 8012 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 8014 retval = critical_reduce_block;
8017 team_size = __kmp_get_team_num_threads(global_tid);
8018 if (team_size == 1) {
8020 retval = empty_reduce_block;
8024 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8026 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 8028 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 8029 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || KMP_OS_KFREEBSD 8031 int teamsize_cutoff = 4;
8033 #if KMP_MIC_SUPPORTED 8034 if (__kmp_mic_type != non_mic) {
8035 teamsize_cutoff = 8;
8038 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8039 if (tree_available) {
8040 if (team_size <= teamsize_cutoff) {
8041 if (atomic_available) {
8042 retval = atomic_reduce_block;
8045 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8047 }
else if (atomic_available) {
8048 retval = atomic_reduce_block;
8051 #error "Unknown or unsupported OS" 8052 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 8055 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 8057 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_KFREEBSD 8061 if (atomic_available) {
8062 if (num_vars <= 2) {
8063 retval = atomic_reduce_block;
8069 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8070 if (atomic_available && (num_vars <= 3)) {
8071 retval = atomic_reduce_block;
8072 }
else if (tree_available) {
8073 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8074 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8075 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8080 #error "Unknown or unsupported OS" 8084 #error "Unknown or unsupported architecture" 8092 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8095 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8097 int atomic_available, tree_available;
8099 switch ((forced_retval = __kmp_force_reduction_method)) {
8100 case critical_reduce_block:
8104 case atomic_reduce_block:
8105 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8106 if (!atomic_available) {
8107 KMP_WARNING(RedMethodNotSupported,
"atomic");
8108 forced_retval = critical_reduce_block;
8112 case tree_reduce_block:
8113 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8114 if (!tree_available) {
8115 KMP_WARNING(RedMethodNotSupported,
"tree");
8116 forced_retval = critical_reduce_block;
8118 #if KMP_FAST_REDUCTION_BARRIER 8119 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8128 retval = forced_retval;
8131 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8133 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 8134 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 8140 kmp_int32 __kmp_get_reduce_method(
void) {
8141 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8146 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8150 void __kmp_hard_pause() {
8151 __kmp_pause_status = kmp_hard_paused;
8152 __kmp_internal_end_thread(-1);
8156 void __kmp_resume_if_soft_paused() {
8157 if (__kmp_pause_status == kmp_soft_paused) {
8158 __kmp_pause_status = kmp_not_paused;
8160 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8161 kmp_info_t *thread = __kmp_threads[gtid];
8163 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8164 if (fl.is_sleeping())
8166 else if (__kmp_try_suspend_mx(thread)) {
8167 __kmp_unlock_suspend_mx(thread);
8170 if (fl.is_sleeping()) {
8173 }
else if (__kmp_try_suspend_mx(thread)) {
8174 __kmp_unlock_suspend_mx(thread);
8186 int __kmp_pause_resource(kmp_pause_status_t level) {
8187 if (level == kmp_not_paused) {
8188 if (__kmp_pause_status == kmp_not_paused) {
8192 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8193 __kmp_pause_status == kmp_hard_paused);
8194 __kmp_pause_status = kmp_not_paused;
8197 }
else if (level == kmp_soft_paused) {
8198 if (__kmp_pause_status != kmp_not_paused) {
8205 }
else if (level == kmp_hard_paused) {
8206 if (__kmp_pause_status != kmp_not_paused) {
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in