LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23  class Mask : public KMPAffinity::Mask {
24  hwloc_cpuset_t mask;
25 
26  public:
27  Mask() {
28  mask = hwloc_bitmap_alloc();
29  this->zero();
30  }
31  ~Mask() { hwloc_bitmap_free(mask); }
32  void set(int i) override { hwloc_bitmap_set(mask, i); }
33  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35  void zero() override { hwloc_bitmap_zero(mask); }
36  void copy(const KMPAffinity::Mask *src) override {
37  const Mask *convert = static_cast<const Mask *>(src);
38  hwloc_bitmap_copy(mask, convert->mask);
39  }
40  void bitwise_and(const KMPAffinity::Mask *rhs) override {
41  const Mask *convert = static_cast<const Mask *>(rhs);
42  hwloc_bitmap_and(mask, mask, convert->mask);
43  }
44  void bitwise_or(const KMPAffinity::Mask *rhs) override {
45  const Mask *convert = static_cast<const Mask *>(rhs);
46  hwloc_bitmap_or(mask, mask, convert->mask);
47  }
48  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49  int begin() const override { return hwloc_bitmap_first(mask); }
50  int end() const override { return -1; }
51  int next(int previous) const override {
52  return hwloc_bitmap_next(mask, previous);
53  }
54  int get_system_affinity(bool abort_on_error) override {
55  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56  "Illegal get affinity operation when not capable");
57  int retval =
58  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59  if (retval >= 0) {
60  return 0;
61  }
62  int error = errno;
63  if (abort_on_error) {
64  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65  }
66  return error;
67  }
68  int set_system_affinity(bool abort_on_error) const override {
69  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70  "Illegal get affinity operation when not capable");
71  int retval =
72  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73  if (retval >= 0) {
74  return 0;
75  }
76  int error = errno;
77  if (abort_on_error) {
78  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79  }
80  return error;
81  }
82  int get_proc_group() const override {
83  int group = -1;
84 #if KMP_OS_WINDOWS
85  if (__kmp_num_proc_groups == 1) {
86  return 1;
87  }
88  for (int i = 0; i < __kmp_num_proc_groups; i++) {
89  // On windows, the long type is always 32 bits
90  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91  unsigned long second_32_bits =
92  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93  if (first_32_bits == 0 && second_32_bits == 0) {
94  continue;
95  }
96  if (group >= 0) {
97  return -1;
98  }
99  group = i;
100  }
101 #endif /* KMP_OS_WINDOWS */
102  return group;
103  }
104  };
105  void determine_capable(const char *var) override {
106  const hwloc_topology_support *topology_support;
107  if (__kmp_hwloc_topology == NULL) {
108  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109  __kmp_hwloc_error = TRUE;
110  if (__kmp_affinity_verbose)
111  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112  }
113  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114  __kmp_hwloc_error = TRUE;
115  if (__kmp_affinity_verbose)
116  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117  }
118  }
119  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120  // Is the system capable of setting/getting this thread's affinity?
121  // Also, is topology discovery possible? (pu indicates ability to discover
122  // processing units). And finally, were there no errors when calling any
123  // hwloc_* API functions?
124  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125  topology_support->cpubind->get_thisthread_cpubind &&
126  topology_support->discovery->pu && !__kmp_hwloc_error) {
127  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128  KMP_AFFINITY_ENABLE(TRUE);
129  } else {
130  // indicate that hwloc didn't work and disable affinity
131  __kmp_hwloc_error = TRUE;
132  KMP_AFFINITY_DISABLE();
133  }
134  }
135  void bind_thread(int which) override {
136  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137  "Illegal set affinity operation when not capable");
138  KMPAffinity::Mask *mask;
139  KMP_CPU_ALLOC_ON_STACK(mask);
140  KMP_CPU_ZERO(mask);
141  KMP_CPU_SET(which, mask);
142  __kmp_set_system_affinity(mask, TRUE);
143  KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148  return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151  Mask *hwloc_array = static_cast<Mask *>(array);
152  delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155  int index) override {
156  Mask *hwloc_array = static_cast<Mask *>(array);
157  return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162 
163 #if KMP_OS_LINUX
164 /* On some of the older OS's that we build on, these constants aren't present
165  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
166  all systems of the same arch where they are defined, and they cannot change.
167  stone forever. */
168 #include <sys/syscall.h>
169 #if KMP_ARCH_X86 || KMP_ARCH_ARM
170 #ifndef __NR_sched_setaffinity
171 #define __NR_sched_setaffinity 241
172 #elif __NR_sched_setaffinity != 241
173 #error Wrong code for setaffinity system call.
174 #endif /* __NR_sched_setaffinity */
175 #ifndef __NR_sched_getaffinity
176 #define __NR_sched_getaffinity 242
177 #elif __NR_sched_getaffinity != 242
178 #error Wrong code for getaffinity system call.
179 #endif /* __NR_sched_getaffinity */
180 #elif KMP_ARCH_AARCH64
181 #ifndef __NR_sched_setaffinity
182 #define __NR_sched_setaffinity 122
183 #elif __NR_sched_setaffinity != 122
184 #error Wrong code for setaffinity system call.
185 #endif /* __NR_sched_setaffinity */
186 #ifndef __NR_sched_getaffinity
187 #define __NR_sched_getaffinity 123
188 #elif __NR_sched_getaffinity != 123
189 #error Wrong code for getaffinity system call.
190 #endif /* __NR_sched_getaffinity */
191 #elif KMP_ARCH_X86_64
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 203
194 #elif __NR_sched_setaffinity != 203
195 #error Wrong code for setaffinity system call.
196 #endif /* __NR_sched_setaffinity */
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 204
199 #elif __NR_sched_getaffinity != 204
200 #error Wrong code for getaffinity system call.
201 #endif /* __NR_sched_getaffinity */
202 #elif KMP_ARCH_PPC64
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 222
205 #elif __NR_sched_setaffinity != 222
206 #error Wrong code for setaffinity system call.
207 #endif /* __NR_sched_setaffinity */
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 223
210 #elif __NR_sched_getaffinity != 223
211 #error Wrong code for getaffinity system call.
212 #endif /* __NR_sched_getaffinity */
213 # elif KMP_ARCH_MIPS
214 # ifndef __NR_sched_setaffinity
215 # define __NR_sched_setaffinity 4239
216 # elif __NR_sched_setaffinity != 4239
217 # error Wrong code for setaffinity system call.
218 # endif /* __NR_sched_setaffinity */
219 # ifndef __NR_sched_getaffinity
220 # define __NR_sched_getaffinity 4240
221 # elif __NR_sched_getaffinity != 4240
222 # error Wrong code for getaffinity system call.
223 # endif /* __NR_sched_getaffinity */
224 # elif KMP_ARCH_MIPS64
225 # ifndef __NR_sched_setaffinity
226 # define __NR_sched_setaffinity 5195
227 # elif __NR_sched_setaffinity != 5195
228 # error Wrong code for setaffinity system call.
229 # endif /* __NR_sched_setaffinity */
230 # ifndef __NR_sched_getaffinity
231 # define __NR_sched_getaffinity 5196
232 # elif __NR_sched_getaffinity != 5196
233 # error Wrong code for getaffinity system call.
234 # endif /* __NR_sched_getaffinity */
235 # else
236 #error Unknown or unsupported architecture
237 #endif /* KMP_ARCH_* */
238 class KMPNativeAffinity : public KMPAffinity {
239  class Mask : public KMPAffinity::Mask {
240  typedef unsigned char mask_t;
241  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
242 
243  public:
244  mask_t *mask;
245  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
246  ~Mask() {
247  if (mask)
248  __kmp_free(mask);
249  }
250  void set(int i) override {
251  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
252  }
253  bool is_set(int i) const override {
254  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
255  }
256  void clear(int i) override {
257  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
258  }
259  void zero() override {
260  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
261  mask[i] = 0;
262  }
263  void copy(const KMPAffinity::Mask *src) override {
264  const Mask *convert = static_cast<const Mask *>(src);
265  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266  mask[i] = convert->mask[i];
267  }
268  void bitwise_and(const KMPAffinity::Mask *rhs) override {
269  const Mask *convert = static_cast<const Mask *>(rhs);
270  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271  mask[i] &= convert->mask[i];
272  }
273  void bitwise_or(const KMPAffinity::Mask *rhs) override {
274  const Mask *convert = static_cast<const Mask *>(rhs);
275  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276  mask[i] |= convert->mask[i];
277  }
278  void bitwise_not() override {
279  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280  mask[i] = ~(mask[i]);
281  }
282  int begin() const override {
283  int retval = 0;
284  while (retval < end() && !is_set(retval))
285  ++retval;
286  return retval;
287  }
288  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
289  int next(int previous) const override {
290  int retval = previous + 1;
291  while (retval < end() && !is_set(retval))
292  ++retval;
293  return retval;
294  }
295  int get_system_affinity(bool abort_on_error) override {
296  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
297  "Illegal get affinity operation when not capable");
298  int retval =
299  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
300  if (retval >= 0) {
301  return 0;
302  }
303  int error = errno;
304  if (abort_on_error) {
305  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
306  }
307  return error;
308  }
309  int set_system_affinity(bool abort_on_error) const override {
310  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
311  "Illegal get affinity operation when not capable");
312  int retval =
313  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
314  if (retval >= 0) {
315  return 0;
316  }
317  int error = errno;
318  if (abort_on_error) {
319  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
320  }
321  return error;
322  }
323  };
324  void determine_capable(const char *env_var) override {
325  __kmp_affinity_determine_capable(env_var);
326  }
327  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
328  KMPAffinity::Mask *allocate_mask() override {
329  KMPNativeAffinity::Mask *retval = new Mask();
330  return retval;
331  }
332  void deallocate_mask(KMPAffinity::Mask *m) override {
333  KMPNativeAffinity::Mask *native_mask =
334  static_cast<KMPNativeAffinity::Mask *>(m);
335  delete native_mask;
336  }
337  KMPAffinity::Mask *allocate_mask_array(int num) override {
338  return new Mask[num];
339  }
340  void deallocate_mask_array(KMPAffinity::Mask *array) override {
341  Mask *linux_array = static_cast<Mask *>(array);
342  delete[] linux_array;
343  }
344  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
345  int index) override {
346  Mask *linux_array = static_cast<Mask *>(array);
347  return &(linux_array[index]);
348  }
349  api_type get_api_type() const override { return NATIVE_OS; }
350 };
351 #endif /* KMP_OS_LINUX */
352 
353 #if KMP_OS_WINDOWS
354 class KMPNativeAffinity : public KMPAffinity {
355  class Mask : public KMPAffinity::Mask {
356  typedef ULONG_PTR mask_t;
357  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
358  mask_t *mask;
359 
360  public:
361  Mask() {
362  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
363  }
364  ~Mask() {
365  if (mask)
366  __kmp_free(mask);
367  }
368  void set(int i) override {
369  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
370  }
371  bool is_set(int i) const override {
372  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
373  }
374  void clear(int i) override {
375  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
376  }
377  void zero() override {
378  for (int i = 0; i < __kmp_num_proc_groups; ++i)
379  mask[i] = 0;
380  }
381  void copy(const KMPAffinity::Mask *src) override {
382  const Mask *convert = static_cast<const Mask *>(src);
383  for (int i = 0; i < __kmp_num_proc_groups; ++i)
384  mask[i] = convert->mask[i];
385  }
386  void bitwise_and(const KMPAffinity::Mask *rhs) override {
387  const Mask *convert = static_cast<const Mask *>(rhs);
388  for (int i = 0; i < __kmp_num_proc_groups; ++i)
389  mask[i] &= convert->mask[i];
390  }
391  void bitwise_or(const KMPAffinity::Mask *rhs) override {
392  const Mask *convert = static_cast<const Mask *>(rhs);
393  for (int i = 0; i < __kmp_num_proc_groups; ++i)
394  mask[i] |= convert->mask[i];
395  }
396  void bitwise_not() override {
397  for (int i = 0; i < __kmp_num_proc_groups; ++i)
398  mask[i] = ~(mask[i]);
399  }
400  int begin() const override {
401  int retval = 0;
402  while (retval < end() && !is_set(retval))
403  ++retval;
404  return retval;
405  }
406  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
407  int next(int previous) const override {
408  int retval = previous + 1;
409  while (retval < end() && !is_set(retval))
410  ++retval;
411  return retval;
412  }
413  int set_system_affinity(bool abort_on_error) const override {
414  if (__kmp_num_proc_groups > 1) {
415  // Check for a valid mask.
416  GROUP_AFFINITY ga;
417  int group = get_proc_group();
418  if (group < 0) {
419  if (abort_on_error) {
420  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
421  }
422  return -1;
423  }
424  // Transform the bit vector into a GROUP_AFFINITY struct
425  // and make the system call to set affinity.
426  ga.Group = group;
427  ga.Mask = mask[group];
428  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
429 
430  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
431  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
432  DWORD error = GetLastError();
433  if (abort_on_error) {
434  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
435  __kmp_msg_null);
436  }
437  return error;
438  }
439  } else {
440  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
441  DWORD error = GetLastError();
442  if (abort_on_error) {
443  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
444  __kmp_msg_null);
445  }
446  return error;
447  }
448  }
449  return 0;
450  }
451  int get_system_affinity(bool abort_on_error) override {
452  if (__kmp_num_proc_groups > 1) {
453  this->zero();
454  GROUP_AFFINITY ga;
455  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
456  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
457  DWORD error = GetLastError();
458  if (abort_on_error) {
459  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
460  KMP_ERR(error), __kmp_msg_null);
461  }
462  return error;
463  }
464  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
465  (ga.Mask == 0)) {
466  return -1;
467  }
468  mask[ga.Group] = ga.Mask;
469  } else {
470  mask_t newMask, sysMask, retval;
471  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
472  DWORD error = GetLastError();
473  if (abort_on_error) {
474  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
475  KMP_ERR(error), __kmp_msg_null);
476  }
477  return error;
478  }
479  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
480  if (!retval) {
481  DWORD error = GetLastError();
482  if (abort_on_error) {
483  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
484  KMP_ERR(error), __kmp_msg_null);
485  }
486  return error;
487  }
488  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
489  if (!newMask) {
490  DWORD error = GetLastError();
491  if (abort_on_error) {
492  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
493  KMP_ERR(error), __kmp_msg_null);
494  }
495  }
496  *mask = retval;
497  }
498  return 0;
499  }
500  int get_proc_group() const override {
501  int group = -1;
502  if (__kmp_num_proc_groups == 1) {
503  return 1;
504  }
505  for (int i = 0; i < __kmp_num_proc_groups; i++) {
506  if (mask[i] == 0)
507  continue;
508  if (group >= 0)
509  return -1;
510  group = i;
511  }
512  return group;
513  }
514  };
515  void determine_capable(const char *env_var) override {
516  __kmp_affinity_determine_capable(env_var);
517  }
518  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
519  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
520  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
521  KMPAffinity::Mask *allocate_mask_array(int num) override {
522  return new Mask[num];
523  }
524  void deallocate_mask_array(KMPAffinity::Mask *array) override {
525  Mask *windows_array = static_cast<Mask *>(array);
526  delete[] windows_array;
527  }
528  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
529  int index) override {
530  Mask *windows_array = static_cast<Mask *>(array);
531  return &(windows_array[index]);
532  }
533  api_type get_api_type() const override { return NATIVE_OS; }
534 };
535 #endif /* KMP_OS_WINDOWS */
536 #endif /* KMP_AFFINITY_SUPPORTED */
537 
538 class Address {
539 public:
540  static const unsigned maxDepth = 32;
541  unsigned labels[maxDepth];
542  unsigned childNums[maxDepth];
543  unsigned depth;
544  unsigned leader;
545  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
546  Address &operator=(const Address &b) {
547  depth = b.depth;
548  for (unsigned i = 0; i < depth; i++) {
549  labels[i] = b.labels[i];
550  childNums[i] = b.childNums[i];
551  }
552  leader = FALSE;
553  return *this;
554  }
555  bool operator==(const Address &b) const {
556  if (depth != b.depth)
557  return false;
558  for (unsigned i = 0; i < depth; i++)
559  if (labels[i] != b.labels[i])
560  return false;
561  return true;
562  }
563  bool isClose(const Address &b, int level) const {
564  if (depth != b.depth)
565  return false;
566  if ((unsigned)level >= depth)
567  return true;
568  for (unsigned i = 0; i < (depth - level); i++)
569  if (labels[i] != b.labels[i])
570  return false;
571  return true;
572  }
573  bool operator!=(const Address &b) const { return !operator==(b); }
574  void print() const {
575  unsigned i;
576  printf("Depth: %u --- ", depth);
577  for (i = 0; i < depth; i++) {
578  printf("%u ", labels[i]);
579  }
580  }
581 };
582 
583 class AddrUnsPair {
584 public:
585  Address first;
586  unsigned second;
587  AddrUnsPair(Address _first, unsigned _second)
588  : first(_first), second(_second) {}
589  AddrUnsPair &operator=(const AddrUnsPair &b) {
590  first = b.first;
591  second = b.second;
592  return *this;
593  }
594  void print() const {
595  printf("first = ");
596  first.print();
597  printf(" --- second = %u", second);
598  }
599  bool operator==(const AddrUnsPair &b) const {
600  if (first != b.first)
601  return false;
602  if (second != b.second)
603  return false;
604  return true;
605  }
606  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
607 };
608 
609 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
610  const Address *aa = &(((const AddrUnsPair *)a)->first);
611  const Address *bb = &(((const AddrUnsPair *)b)->first);
612  unsigned depth = aa->depth;
613  unsigned i;
614  KMP_DEBUG_ASSERT(depth == bb->depth);
615  for (i = 0; i < depth; i++) {
616  if (aa->labels[i] < bb->labels[i])
617  return -1;
618  if (aa->labels[i] > bb->labels[i])
619  return 1;
620  }
621  return 0;
622 }
623 
624 /* A structure for holding machine-specific hierarchy info to be computed once
625  at init. This structure represents a mapping of threads to the actual machine
626  hierarchy, or to our best guess at what the hierarchy might be, for the
627  purpose of performing an efficient barrier. In the worst case, when there is
628  no machine hierarchy information, it produces a tree suitable for a barrier,
629  similar to the tree used in the hyper barrier. */
630 class hierarchy_info {
631 public:
632  /* Good default values for number of leaves and branching factor, given no
633  affinity information. Behaves a bit like hyper barrier. */
634  static const kmp_uint32 maxLeaves = 4;
635  static const kmp_uint32 minBranch = 4;
641  kmp_uint32 maxLevels;
642 
647  kmp_uint32 depth;
648  kmp_uint32 base_num_threads;
649  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
650  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
651  // 2=initialization in progress
652  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
653 
658  kmp_uint32 *numPerLevel;
659  kmp_uint32 *skipPerLevel;
660 
661  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
662  int hier_depth = adr2os[0].first.depth;
663  int level = 0;
664  for (int i = hier_depth - 1; i >= 0; --i) {
665  int max = -1;
666  for (int j = 0; j < num_addrs; ++j) {
667  int next = adr2os[j].first.childNums[i];
668  if (next > max)
669  max = next;
670  }
671  numPerLevel[level] = max + 1;
672  ++level;
673  }
674  }
675 
676  hierarchy_info()
677  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
678 
679  void fini() {
680  if (!uninitialized && numPerLevel) {
681  __kmp_free(numPerLevel);
682  numPerLevel = NULL;
683  uninitialized = not_initialized;
684  }
685  }
686 
687  void init(AddrUnsPair *adr2os, int num_addrs) {
688  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
689  &uninitialized, not_initialized, initializing);
690  if (bool_result == 0) { // Wait for initialization
691  while (TCR_1(uninitialized) != initialized)
692  KMP_CPU_PAUSE();
693  return;
694  }
695  KMP_DEBUG_ASSERT(bool_result == 1);
696 
697  /* Added explicit initialization of the data fields here to prevent usage of
698  dirty value observed when static library is re-initialized multiple times
699  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
700  OpenMP). */
701  depth = 1;
702  resizing = 0;
703  maxLevels = 7;
704  numPerLevel =
705  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
706  skipPerLevel = &(numPerLevel[maxLevels]);
707  for (kmp_uint32 i = 0; i < maxLevels;
708  ++i) { // init numPerLevel[*] to 1 item per level
709  numPerLevel[i] = 1;
710  skipPerLevel[i] = 1;
711  }
712 
713  // Sort table by physical ID
714  if (adr2os) {
715  qsort(adr2os, num_addrs, sizeof(*adr2os),
716  __kmp_affinity_cmp_Address_labels);
717  deriveLevels(adr2os, num_addrs);
718  } else {
719  numPerLevel[0] = maxLeaves;
720  numPerLevel[1] = num_addrs / maxLeaves;
721  if (num_addrs % maxLeaves)
722  numPerLevel[1]++;
723  }
724 
725  base_num_threads = num_addrs;
726  for (int i = maxLevels - 1; i >= 0;
727  --i) // count non-empty levels to get depth
728  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
729  depth++;
730 
731  kmp_uint32 branch = minBranch;
732  if (numPerLevel[0] == 1)
733  branch = num_addrs / maxLeaves;
734  if (branch < minBranch)
735  branch = minBranch;
736  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
737  while (numPerLevel[d] > branch ||
738  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
739  if (numPerLevel[d] & 1)
740  numPerLevel[d]++;
741  numPerLevel[d] = numPerLevel[d] >> 1;
742  if (numPerLevel[d + 1] == 1)
743  depth++;
744  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
745  }
746  if (numPerLevel[0] == 1) {
747  branch = branch >> 1;
748  if (branch < 4)
749  branch = minBranch;
750  }
751  }
752 
753  for (kmp_uint32 i = 1; i < depth; ++i)
754  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
755  // Fill in hierarchy in the case of oversubscription
756  for (kmp_uint32 i = depth; i < maxLevels; ++i)
757  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
758 
759  uninitialized = initialized; // One writer
760  }
761 
762  // Resize the hierarchy if nproc changes to something larger than before
763  void resize(kmp_uint32 nproc) {
764  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
765  while (bool_result == 0) { // someone else is trying to resize
766  KMP_CPU_PAUSE();
767  if (nproc <= base_num_threads) // happy with other thread's resize
768  return;
769  else // try to resize
770  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
771  }
772  KMP_DEBUG_ASSERT(bool_result != 0);
773  if (nproc <= base_num_threads)
774  return; // happy with other thread's resize
775 
776  // Calculate new maxLevels
777  kmp_uint32 old_sz = skipPerLevel[depth - 1];
778  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
779  // First see if old maxLevels is enough to contain new size
780  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
781  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
782  numPerLevel[i - 1] *= 2;
783  old_sz *= 2;
784  depth++;
785  }
786  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
787  while (nproc > old_sz) {
788  old_sz *= 2;
789  incs++;
790  depth++;
791  }
792  maxLevels += incs;
793 
794  // Resize arrays
795  kmp_uint32 *old_numPerLevel = numPerLevel;
796  kmp_uint32 *old_skipPerLevel = skipPerLevel;
797  numPerLevel = skipPerLevel = NULL;
798  numPerLevel =
799  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
800  skipPerLevel = &(numPerLevel[maxLevels]);
801 
802  // Copy old elements from old arrays
803  for (kmp_uint32 i = 0; i < old_maxLevels;
804  ++i) { // init numPerLevel[*] to 1 item per level
805  numPerLevel[i] = old_numPerLevel[i];
806  skipPerLevel[i] = old_skipPerLevel[i];
807  }
808 
809  // Init new elements in arrays to 1
810  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
811  ++i) { // init numPerLevel[*] to 1 item per level
812  numPerLevel[i] = 1;
813  skipPerLevel[i] = 1;
814  }
815 
816  // Free old arrays
817  __kmp_free(old_numPerLevel);
818  }
819 
820  // Fill in oversubscription levels of hierarchy
821  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
822  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
823 
824  base_num_threads = nproc;
825  resizing = 0; // One writer
826  }
827 };
828 #endif // KMP_AFFINITY_H