31 #include "ompt-specific.cpp" 37 #define ompt_get_callback_success 1 38 #define ompt_get_callback_failure 0 40 #define no_tool_present 0 42 #define OMPT_API_ROUTINE static 44 #ifndef OMPT_STR_MATCH 45 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) 53 const char *state_name;
54 ompt_state_t state_id;
60 } kmp_mutex_impl_info_t;
73 ompt_callbacks_active_t ompt_enabled;
75 ompt_state_info_t ompt_state_info[] = {
76 #define ompt_state_macro(state, code) {#state, state}, 77 FOREACH_OMPT_STATE(ompt_state_macro)
78 #undef ompt_state_macro 81 kmp_mutex_impl_info_t kmp_mutex_impl_info[] = {
82 #define kmp_mutex_impl_macro(name, id) {#name, name}, 83 FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)
84 #undef kmp_mutex_impl_macro 87 ompt_callbacks_internal_t ompt_callbacks;
89 static ompt_start_tool_result_t *ompt_start_tool_result = NULL;
95 static ompt_interface_fn_t ompt_fn_lookup(
const char *s);
97 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(
void);
103 typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(
unsigned int,
117 static ompt_start_tool_result_t *ompt_tool_darwin(
unsigned int omp_version,
118 const char *runtime_version) {
119 ompt_start_tool_result_t *ret = NULL;
121 ompt_start_tool_t start_tool =
122 (ompt_start_tool_t)dlsym(RTLD_DEFAULT,
"ompt_start_tool");
124 ret = start_tool(omp_version, runtime_version);
129 #elif OMPT_HAVE_WEAK_ATTRIBUTE 135 _OMP_EXTERN OMPT_WEAK_ATTRIBUTE ompt_start_tool_result_t *
136 ompt_start_tool(
unsigned int omp_version,
const char *runtime_version) {
137 ompt_start_tool_result_t *ret = NULL;
142 ompt_start_tool_t next_tool =
143 (ompt_start_tool_t)dlsym(RTLD_NEXT,
"ompt_start_tool");
145 ret = next_tool(omp_version, runtime_version);
150 #elif OMPT_HAVE_PSAPI 158 #pragma comment(lib, "psapi.lib") 161 #define NUM_MODULES 128 163 static ompt_start_tool_result_t *
164 ompt_tool_windows(
unsigned int omp_version,
const char *runtime_version) {
166 DWORD needed, new_size;
168 HANDLE process = GetCurrentProcess();
169 modules = (HMODULE *)malloc(NUM_MODULES *
sizeof(HMODULE));
170 ompt_start_tool_t ompt_tool_p = NULL;
173 printf(
"ompt_tool_windows(): looking for ompt_start_tool\n");
175 if (!EnumProcessModules(process, modules, NUM_MODULES *
sizeof(HMODULE),
182 new_size = needed /
sizeof(HMODULE);
183 if (new_size > NUM_MODULES) {
185 printf(
"ompt_tool_windows(): resize buffer to %d bytes\n", needed);
187 modules = (HMODULE *)realloc(modules, needed);
189 if (!EnumProcessModules(process, modules, needed, &needed)) {
194 for (i = 0; i < new_size; ++i) {
195 (FARPROC &)ompt_tool_p = GetProcAddress(modules[i],
"ompt_start_tool");
198 TCHAR modName[MAX_PATH];
199 if (GetModuleFileName(modules[i], modName, MAX_PATH))
200 printf(
"ompt_tool_windows(): ompt_start_tool found in module %s\n",
204 return (*ompt_tool_p)(omp_version, runtime_version);
208 TCHAR modName[MAX_PATH];
209 if (GetModuleFileName(modules[i], modName, MAX_PATH))
210 printf(
"ompt_tool_windows(): ompt_start_tool not found in module %s\n",
219 #error Activation of OMPT is not supported on this platform. 222 static ompt_start_tool_result_t *
223 ompt_try_start_tool(
unsigned int omp_version,
const char *runtime_version) {
224 ompt_start_tool_result_t *ret = NULL;
225 ompt_start_tool_t start_tool = NULL;
228 const char *sep =
";";
230 const char *sep =
":";
235 ret = ompt_tool_darwin(omp_version, runtime_version);
236 #elif OMPT_HAVE_WEAK_ATTRIBUTE 237 ret = ompt_start_tool(omp_version, runtime_version);
238 #elif OMPT_HAVE_PSAPI 239 ret = ompt_tool_windows(omp_version, runtime_version);
241 #error Activation of OMPT is not supported on this platform. 247 const char *tool_libs = getenv(
"OMP_TOOL_LIBRARIES");
249 char *libs = __kmp_str_format(
"%s", tool_libs);
251 char *fname = __kmp_str_token(libs, sep, &buf);
254 void *h = dlopen(fname, RTLD_LAZY);
256 start_tool = (ompt_start_tool_t)dlsym(h,
"ompt_start_tool");
258 HMODULE h = LoadLibrary(fname);
260 start_tool = (ompt_start_tool_t)GetProcAddress(h,
"ompt_start_tool");
262 #error Activation of OMPT is not supported on this platform. 264 if (start_tool && (ret = (*start_tool)(omp_version, runtime_version)))
267 fname = __kmp_str_token(NULL, sep, &buf);
269 __kmp_str_free(&libs);
274 void ompt_pre_init() {
278 static int ompt_pre_initialized = 0;
280 if (ompt_pre_initialized)
283 ompt_pre_initialized = 1;
288 const char *ompt_env_var = getenv(
"OMP_TOOL");
289 tool_setting_e tool_setting = omp_tool_error;
291 if (!ompt_env_var || !strcmp(ompt_env_var,
""))
292 tool_setting = omp_tool_unset;
293 else if (OMPT_STR_MATCH(ompt_env_var,
"disabled"))
294 tool_setting = omp_tool_disabled;
295 else if (OMPT_STR_MATCH(ompt_env_var,
"enabled"))
296 tool_setting = omp_tool_enabled;
299 printf(
"ompt_pre_init(): tool_setting = %d\n", tool_setting);
301 switch (tool_setting) {
302 case omp_tool_disabled:
306 case omp_tool_enabled:
311 ompt_start_tool_result =
312 ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version());
314 memset(&ompt_enabled, 0,
sizeof(ompt_enabled));
318 fprintf(stderr,
"Warning: OMP_TOOL has invalid value \"%s\".\n" 319 " legal values are (NULL,\"\",\"disabled\"," 325 printf(
"ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
329 extern "C" int omp_get_initial_device(
void);
331 void ompt_post_init() {
335 static int ompt_post_initialized = 0;
337 if (ompt_post_initialized)
340 ompt_post_initialized = 1;
345 if (ompt_start_tool_result) {
346 ompt_enabled.enabled = !!ompt_start_tool_result->initialize(
347 ompt_fn_lookup, omp_get_initial_device(), &(ompt_start_tool_result->tool_data));
349 if (!ompt_enabled.enabled) {
351 memset(&ompt_enabled, 0,
sizeof(ompt_enabled));
355 kmp_info_t *root_thread = ompt_get_thread();
357 ompt_set_thread_state(root_thread, ompt_state_overhead);
359 if (ompt_enabled.ompt_callback_thread_begin) {
360 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
361 ompt_thread_initial, __ompt_get_thread_data_internal());
363 ompt_data_t *task_data;
364 ompt_data_t *parallel_data;
365 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
366 if (ompt_enabled.ompt_callback_implicit_task) {
367 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
368 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
371 ompt_set_thread_state(root_thread, ompt_state_work_serial);
376 if (ompt_enabled.enabled) {
377 ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data));
380 memset(&ompt_enabled, 0,
sizeof(ompt_enabled));
391 OMPT_API_ROUTINE
int ompt_enumerate_states(
int current_state,
int *next_state,
392 const char **next_state_name) {
393 const static int len =
sizeof(ompt_state_info) /
sizeof(ompt_state_info_t);
396 for (i = 0; i < len - 1; i++) {
397 if (ompt_state_info[i].state_id == current_state) {
398 *next_state = ompt_state_info[i + 1].state_id;
399 *next_state_name = ompt_state_info[i + 1].state_name;
407 OMPT_API_ROUTINE
int ompt_enumerate_mutex_impls(
int current_impl,
409 const char **next_impl_name) {
410 const static int len =
411 sizeof(kmp_mutex_impl_info) /
sizeof(kmp_mutex_impl_info_t);
413 for (i = 0; i < len - 1; i++) {
414 if (kmp_mutex_impl_info[i].
id != current_impl)
416 *next_impl = kmp_mutex_impl_info[i + 1].id;
417 *next_impl_name = kmp_mutex_impl_info[i + 1].name;
427 OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which,
428 ompt_callback_t callback) {
431 #define ompt_event_macro(event_name, callback_type, event_id) \ 433 if (ompt_event_implementation_status(event_name)) { \ 434 ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ 435 ompt_enabled.event_name = (callback != 0); \ 438 return ompt_event_implementation_status(event_name); \ 440 return ompt_set_always; 442 FOREACH_OMPT_EVENT(ompt_event_macro)
444 #undef ompt_event_macro 447 return ompt_set_error;
451 OMPT_API_ROUTINE
int ompt_get_callback(ompt_callbacks_t which,
452 ompt_callback_t *callback) {
453 if (!ompt_enabled.enabled)
454 return ompt_get_callback_failure;
458 #define ompt_event_macro(event_name, callback_type, event_id) \ 460 if (ompt_event_implementation_status(event_name)) { \ 461 ompt_callback_t mycb = \ 462 (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ 463 if (ompt_enabled.event_name && mycb) { \ 465 return ompt_get_callback_success; \ 468 return ompt_get_callback_failure; 470 FOREACH_OMPT_EVENT(ompt_event_macro)
472 #undef ompt_event_macro 475 return ompt_get_callback_failure;
483 OMPT_API_ROUTINE
int ompt_get_parallel_info(
int ancestor_level,
484 ompt_data_t **parallel_data,
486 if (!ompt_enabled.enabled)
488 return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
492 OMPT_API_ROUTINE
int ompt_get_state(ompt_wait_id_t *wait_id) {
493 if (!ompt_enabled.enabled)
494 return ompt_state_work_serial;
495 int thread_state = __ompt_get_state_internal(wait_id);
497 if (thread_state == ompt_state_undefined) {
498 thread_state = ompt_state_work_serial;
508 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(
void) {
509 if (!ompt_enabled.enabled)
511 return __ompt_get_thread_data_internal();
514 OMPT_API_ROUTINE
int ompt_get_task_info(
int ancestor_level,
int *type,
515 ompt_data_t **task_data,
516 ompt_frame_t **task_frame,
517 ompt_data_t **parallel_data,
519 if (!ompt_enabled.enabled)
521 return __ompt_get_task_info_internal(ancestor_level, type, task_data,
522 task_frame, parallel_data, thread_num);
525 OMPT_API_ROUTINE
int ompt_get_task_memory(
void **addr,
size_t *size,
527 return __ompt_get_task_memory_internal(addr, size, block);
534 OMPT_API_ROUTINE
int ompt_get_num_procs(
void) {
537 return __kmp_avail_proc;
544 OMPT_API_ROUTINE
int ompt_get_num_places(
void) {
546 #if !KMP_AFFINITY_SUPPORTED 549 if (!KMP_AFFINITY_CAPABLE())
551 return __kmp_affinity_num_masks;
555 OMPT_API_ROUTINE
int ompt_get_place_proc_ids(
int place_num,
int ids_size,
558 #if !KMP_AFFINITY_SUPPORTED 562 int tmp_ids[ids_size];
563 if (!KMP_AFFINITY_CAPABLE())
565 if (place_num < 0 || place_num >= (
int)__kmp_affinity_num_masks)
569 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
571 KMP_CPU_SET_ITERATE(i, mask) {
572 if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
573 (!KMP_CPU_ISSET(i, mask))) {
576 if (count < ids_size)
580 if (ids_size >= count) {
581 for (i = 0; i < count; i++) {
589 OMPT_API_ROUTINE
int ompt_get_place_num(
void) {
591 #if !KMP_AFFINITY_SUPPORTED 594 if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
599 if (!KMP_AFFINITY_CAPABLE())
601 gtid = __kmp_entry_gtid();
602 thread = __kmp_thread_from_gtid(gtid);
603 if (thread == NULL || thread->th.th_current_place < 0)
605 return thread->th.th_current_place;
609 OMPT_API_ROUTINE
int ompt_get_partition_place_nums(
int place_nums_size,
612 #if !KMP_AFFINITY_SUPPORTED 615 if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
618 int i, gtid, place_num, first_place, last_place, start, end;
620 if (!KMP_AFFINITY_CAPABLE())
622 gtid = __kmp_entry_gtid();
623 thread = __kmp_thread_from_gtid(gtid);
626 first_place = thread->th.th_first_place;
627 last_place = thread->th.th_last_place;
628 if (first_place < 0 || last_place < 0)
630 if (first_place <= last_place) {
637 if (end - start <= place_nums_size)
638 for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) {
639 place_nums[i] = place_num;
641 return end - start + 1;
649 OMPT_API_ROUTINE
int ompt_get_proc_id(
void) {
650 if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
653 return sched_getcpu();
656 GetCurrentProcessorNumberEx(&pn);
657 return 64 * pn.Group + pn.Number;
680 int __kmp_control_tool(uint64_t command, uint64_t modifier,
void *arg) {
682 if (ompt_enabled.enabled) {
683 if (ompt_enabled.ompt_callback_control_tool) {
684 return ompt_callbacks.ompt_callback(ompt_callback_control_tool)(
685 command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid()));
698 OMPT_API_ROUTINE uint64_t ompt_get_unique_id(
void) {
699 return __ompt_get_unique_id_internal();
702 OMPT_API_ROUTINE
void ompt_finalize_tool(
void) { __kmp_internal_end_atexit(); }
708 OMPT_API_ROUTINE
int ompt_get_target_info(uint64_t *device_num,
709 ompt_id_t *target_id,
710 ompt_id_t *host_op_id) {
714 OMPT_API_ROUTINE
int ompt_get_num_devices(
void) {
722 static ompt_interface_fn_t ompt_fn_lookup(
const char *s) {
724 #define ompt_interface_fn(fn) \ 725 fn##_t fn##_f = fn; \ 726 if (strcmp(s, #fn) == 0) \ 727 return (ompt_interface_fn_t)fn##_f; 729 FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
731 return (ompt_interface_fn_t)0;