17 #include "kmp_wait_release.h" 18 #include "kmp_taskdeps.h" 20 #include "ompt-specific.h" 32 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 33 static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
36 static void __kmp_init_node(kmp_depnode_t *node) {
37 node->dn.successors = NULL;
40 for (
int i = 0; i < MAX_MTX_DEPS; ++i)
41 node->dn.mtx_locks[i] = NULL;
42 node->dn.mtx_num_locks = 0;
43 __kmp_init_lock(&node->dn.lock);
44 KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1);
45 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 46 node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
50 static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
51 KMP_ATOMIC_INC(&node->dn.nrefs);
55 enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
57 static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr,
size_t hsize) {
60 return ((addr >> 6) ^ (addr >> 2)) % hsize;
63 static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
64 kmp_taskdata_t *current_task) {
69 if (current_task->td_flags.tasktype == TASK_IMPLICIT)
70 h_size = KMP_DEPHASH_MASTER_SIZE;
72 h_size = KMP_DEPHASH_OTHER_SIZE;
75 h_size *
sizeof(kmp_dephash_entry_t *) +
sizeof(kmp_dephash_t);
78 h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
80 h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
88 h->buckets = (kmp_dephash_entry **)(h + 1);
90 for (
size_t i = 0; i < h_size; i++)
96 #define ENTRY_LAST_INS 0 97 #define ENTRY_LAST_MTXS 1 99 static kmp_dephash_entry *
100 __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
101 kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
103 kmp_dephash_entry_t *entry;
104 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
105 if (entry->addr == addr)
111 entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
112 thread,
sizeof(kmp_dephash_entry_t));
114 entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
115 thread,
sizeof(kmp_dephash_entry_t));
118 entry->last_out = NULL;
119 entry->last_ins = NULL;
120 entry->last_mtxs = NULL;
121 entry->last_flag = ENTRY_LAST_INS;
122 entry->mtx_lock = NULL;
123 entry->next_in_bucket = h->buckets[bucket];
124 h->buckets[bucket] = entry;
127 if (entry->next_in_bucket)
134 static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
135 kmp_depnode_list_t *list,
136 kmp_depnode_t *node) {
137 kmp_depnode_list_t *new_head;
140 new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
141 thread,
sizeof(kmp_depnode_list_t));
143 new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
144 thread,
sizeof(kmp_depnode_list_t));
147 new_head->node = __kmp_node_ref(node);
148 new_head->next = list;
153 static inline void __kmp_track_dependence(kmp_depnode_t *source,
155 kmp_task_t *sink_task) {
156 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 157 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
160 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
162 __kmp_printf(
"%d(%s) -> %d(%s)\n", source->dn.id,
163 task_source->td_ident->psource, sink->dn.id,
164 task_sink->td_ident->psource);
166 #if OMPT_SUPPORT && OMPT_OPTIONAL 170 if (ompt_enabled.ompt_callback_task_dependence) {
171 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
172 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
174 ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
175 &(task_source->ompt_task_info.task_data),
176 &(task_sink->ompt_task_info.task_data));
181 static inline kmp_int32
182 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
183 kmp_task_t *task, kmp_depnode_t *node,
184 kmp_depnode_list_t *plist) {
187 kmp_int32 npredecessors = 0;
189 for (kmp_depnode_list_t *p = plist; p; p = p->next) {
190 kmp_depnode_t *dep = p->node;
192 KMP_ACQUIRE_DEPNODE(gtid, dep);
194 __kmp_track_dependence(dep, node, task);
195 dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
196 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to " 198 gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
199 KMP_TASK_TO_TASKDATA(task)));
202 KMP_RELEASE_DEPNODE(gtid, dep);
205 return npredecessors;
208 static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
211 kmp_depnode_t *source,
212 kmp_depnode_t *sink) {
215 kmp_int32 npredecessors = 0;
218 KMP_ACQUIRE_DEPNODE(gtid, sink);
220 __kmp_track_dependence(sink, source, task);
221 sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
222 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to " 224 gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
225 KMP_TASK_TO_TASKDATA(task)));
228 KMP_RELEASE_DEPNODE(gtid, sink);
230 return npredecessors;
233 template <
bool filter>
234 static inline kmp_int32
235 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
236 bool dep_barrier, kmp_int32 ndeps,
237 kmp_depend_info_t *dep_list, kmp_task_t *task) {
238 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d processing %d dependencies : " 239 "dep_barrier = %d\n",
240 filter, gtid, ndeps, dep_barrier));
242 kmp_info_t *thread = __kmp_threads[gtid];
243 kmp_int32 npredecessors = 0;
244 for (kmp_int32 i = 0; i < ndeps; i++) {
245 const kmp_depend_info_t *dep = &dep_list[i];
247 if (filter && dep->base_addr == 0)
250 kmp_dephash_entry_t *info =
251 __kmp_dephash_find(thread, hash, dep->base_addr);
252 kmp_depnode_t *last_out = info->last_out;
253 kmp_depnode_list_t *last_ins = info->last_ins;
254 kmp_depnode_list_t *last_mtxs = info->last_mtxs;
256 if (dep->flags.out) {
257 if (last_ins || last_mtxs) {
258 if (info->last_flag == ENTRY_LAST_INS) {
260 __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
263 __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
265 __kmp_depnode_list_free(thread, last_ins);
266 __kmp_depnode_list_free(thread, last_mtxs);
267 info->last_ins = NULL;
268 info->last_mtxs = NULL;
271 __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
273 __kmp_node_deref(thread, last_out);
278 info->last_out = NULL;
280 info->last_out = __kmp_node_ref(node);
282 }
else if (dep->flags.in) {
286 __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
287 __kmp_node_deref(thread, last_out);
288 info->last_out = NULL;
289 if (info->last_flag == ENTRY_LAST_MTXS && last_ins) {
291 __kmp_depnode_list_free(thread, last_ins);
292 info->last_ins = NULL;
297 __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
299 info->last_flag = ENTRY_LAST_INS;
300 info->last_ins = __kmp_add_node(thread, info->last_ins, node);
302 KMP_DEBUG_ASSERT(dep->flags.mtx == 1);
306 __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
307 __kmp_node_deref(thread, last_out);
308 info->last_out = NULL;
309 if (info->last_flag == ENTRY_LAST_INS && last_mtxs) {
311 __kmp_depnode_list_free(thread, last_mtxs);
312 info->last_mtxs = NULL;
317 __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
319 info->last_flag = ENTRY_LAST_MTXS;
320 info->last_mtxs = __kmp_add_node(thread, info->last_mtxs, node);
321 if (info->mtx_lock == NULL) {
322 info->mtx_lock = (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
323 __kmp_init_lock(info->mtx_lock);
325 KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
328 for (m = 0; m < MAX_MTX_DEPS; ++m) {
330 if (node->dn.mtx_locks[m] < info->mtx_lock) {
331 KMP_DEBUG_ASSERT(node->dn.mtx_locks[node->dn.mtx_num_locks] == NULL);
332 for (
int n = node->dn.mtx_num_locks; n > m; --n) {
334 KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
335 node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
337 node->dn.mtx_locks[m] = info->mtx_lock;
341 KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS);
342 node->dn.mtx_num_locks++;
345 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
346 gtid, npredecessors));
347 return npredecessors;
350 #define NO_DEP_BARRIER (false) 351 #define DEP_BARRIER (true) 354 static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
355 kmp_task_t *task, kmp_dephash_t *hash,
356 bool dep_barrier, kmp_int32 ndeps,
357 kmp_depend_info_t *dep_list,
358 kmp_int32 ndeps_noalias,
359 kmp_depend_info_t *noalias_dep_list) {
362 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
364 KA_TRACE(20, (
"__kmp_check_deps: T#%d checking dependencies for task %p : %d " 365 "possibly aliased dependencies, %d non-aliased depedencies : " 366 "dep_barrier=%d .\n",
367 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
371 for (i = 0; i < ndeps; i++) {
372 if (dep_list[i].base_addr != 0) {
373 for (
int j = i + 1; j < ndeps; j++) {
374 if (dep_list[i].base_addr == dep_list[j].base_addr) {
375 dep_list[i].flags.in |= dep_list[j].flags.in;
376 dep_list[i].flags.out |=
377 (dep_list[j].flags.out ||
378 (dep_list[i].flags.in && dep_list[j].flags.mtx) ||
379 (dep_list[i].flags.mtx && dep_list[j].flags.in));
380 dep_list[i].flags.mtx =
381 dep_list[i].flags.mtx | dep_list[j].flags.mtx &&
382 !dep_list[i].flags.out;
383 dep_list[j].base_addr = 0;
386 if (dep_list[i].flags.mtx) {
388 if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
391 dep_list[i].flags.in = 1;
392 dep_list[i].flags.out = 1;
393 dep_list[i].flags.mtx = 0;
403 node->dn.npredecessors = -1;
409 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
411 npredecessors += __kmp_process_deps<false>(
412 gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
414 node->dn.task = task;
424 node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
426 KA_TRACE(20, (
"__kmp_check_deps: T#%d found %d predecessors for task %p \n",
427 gtid, npredecessors, taskdata));
431 return npredecessors > 0 ? true :
false;
451 kmp_task_t *new_task, kmp_int32 ndeps,
452 kmp_depend_info_t *dep_list,
453 kmp_int32 ndeps_noalias,
454 kmp_depend_info_t *noalias_dep_list) {
456 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
457 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
458 loc_ref, new_taskdata));
460 kmp_info_t *thread = __kmp_threads[gtid];
461 kmp_taskdata_t *current_task = thread->th.th_current_task;
464 if (ompt_enabled.enabled) {
465 OMPT_STORE_RETURN_ADDRESS(gtid);
466 if (!current_task->ompt_task_info.frame.enter_frame.ptr)
467 current_task->ompt_task_info.frame.enter_frame.ptr =
468 OMPT_GET_FRAME_ADDRESS(0);
469 if (ompt_enabled.ompt_callback_task_create) {
470 ompt_data_t task_data = ompt_data_none;
471 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
472 current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
473 current_task ? &(current_task->ompt_task_info.frame) : NULL,
474 &(new_taskdata->ompt_task_info.task_data),
475 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
476 OMPT_LOAD_RETURN_ADDRESS(gtid));
479 new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
484 if (ndeps + ndeps_noalias > 0 &&
485 ompt_enabled.ompt_callback_dependences) {
488 new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
489 new_taskdata->ompt_task_info.deps =
490 (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
491 thread, (ndeps + ndeps_noalias) *
sizeof(ompt_dependence_t));
493 KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
495 for (i = 0; i < ndeps; i++) {
496 new_taskdata->ompt_task_info.deps[i].variable.ptr =
497 (
void *)dep_list[i].base_addr;
498 if (dep_list[i].flags.in && dep_list[i].flags.out)
499 new_taskdata->ompt_task_info.deps[i].dependence_type =
500 ompt_dependence_type_inout;
501 else if (dep_list[i].flags.out)
502 new_taskdata->ompt_task_info.deps[i].dependence_type =
503 ompt_dependence_type_out;
504 else if (dep_list[i].flags.in)
505 new_taskdata->ompt_task_info.deps[i].dependence_type =
506 ompt_dependence_type_in;
508 for (i = 0; i < ndeps_noalias; i++) {
509 new_taskdata->ompt_task_info.deps[ndeps + i].variable.ptr =
510 (
void *)noalias_dep_list[i].base_addr;
511 if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
512 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
513 ompt_dependence_type_inout;
514 else if (noalias_dep_list[i].flags.out)
515 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
516 ompt_dependence_type_out;
517 else if (noalias_dep_list[i].flags.in)
518 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
519 ompt_dependence_type_in;
521 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
522 &(new_taskdata->ompt_task_info.task_data),
523 new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps);
526 KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps);
527 new_taskdata->ompt_task_info.deps = NULL;
528 new_taskdata->ompt_task_info.ndeps = 0;
533 bool serial = current_task->td_flags.team_serial ||
534 current_task->td_flags.tasking_ser ||
535 current_task->td_flags.final;
536 kmp_task_team_t *task_team = thread->th.th_task_team;
537 serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
539 if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
541 if (current_task->td_dephash == NULL)
542 current_task->td_dephash = __kmp_dephash_create(thread, current_task);
545 kmp_depnode_t *node =
546 (kmp_depnode_t *)__kmp_fast_allocate(thread,
sizeof(kmp_depnode_t));
548 kmp_depnode_t *node =
549 (kmp_depnode_t *)__kmp_thread_malloc(thread,
sizeof(kmp_depnode_t));
552 __kmp_init_node(node);
553 new_taskdata->td_depnode = node;
555 if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
556 NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
558 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had blocking " 560 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
561 gtid, loc_ref, new_taskdata));
563 if (ompt_enabled.enabled) {
564 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
567 return TASK_CURRENT_NOT_QUEUED;
570 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies " 571 "for task (serialized)" 573 gtid, loc_ref, new_taskdata));
576 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " 578 "loc=%p task=%p, transferring to __kmp_omp_task\n",
579 gtid, loc_ref, new_taskdata));
581 kmp_int32 ret = __kmp_omp_task(gtid, new_task,
true);
583 if (ompt_enabled.enabled) {
584 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
602 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
603 kmp_depend_info_t *noalias_dep_list) {
604 KA_TRACE(10, (
"__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
606 if (ndeps == 0 && ndeps_noalias == 0) {
607 KA_TRACE(10, (
"__kmpc_omp_wait_deps(exit): T#%d has no dependencies to " 608 "wait upon : loc=%p\n",
613 kmp_info_t *thread = __kmp_threads[gtid];
614 kmp_taskdata_t *current_task = thread->th.th_current_task;
619 bool ignore = current_task->td_flags.team_serial ||
620 current_task->td_flags.tasking_ser ||
621 current_task->td_flags.final;
622 ignore = ignore && thread->th.th_task_team != NULL &&
623 thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
624 ignore = ignore || current_task->td_dephash == NULL;
627 KA_TRACE(10, (
"__kmpc_omp_wait_deps(exit): T#%d has no blocking " 628 "dependencies : loc=%p\n",
633 kmp_depnode_t node = {0};
634 __kmp_init_node(&node);
636 if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
637 DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
639 KA_TRACE(10, (
"__kmpc_omp_wait_deps(exit): T#%d has no blocking " 640 "dependencies : loc=%p\n",
645 int thread_finished = FALSE;
646 kmp_flag_32 flag((std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
647 while (node.dn.npredecessors > 0) {
648 flag.execute_tasks(thread, gtid, FALSE,
649 &thread_finished USE_ITT_BUILD_ARG(NULL),
650 __kmp_task_stealing_constraint);
653 KA_TRACE(10, (
"__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)