LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 template <typename T>
65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66  kmp_int32 schedtype, kmp_int32 *plastiter,
67  T *plower, T *pupper,
68  typename traits_t<T>::signed_t *pstride,
69  typename traits_t<T>::signed_t incr,
70  typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
72  ,
73  void *codeptr
74 #endif
75  ) {
76  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79 
80  typedef typename traits_t<T>::unsigned_t UT;
81  typedef typename traits_t<T>::signed_t ST;
82  /* this all has to be changed back to TID and such.. */
83  kmp_int32 gtid = global_tid;
84  kmp_uint32 tid;
85  kmp_uint32 nth;
86  UT trip_count;
87  kmp_team_t *team;
88  kmp_info_t *th = __kmp_threads[gtid];
89 
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91  ompt_team_info_t *team_info = NULL;
92  ompt_task_info_t *task_info = NULL;
93  ompt_work_t ompt_work_type = ompt_work_loop;
94 
95  static kmp_int8 warn = 0;
96 
97  if (ompt_enabled.ompt_callback_work) {
98  // Only fully initialize variables needed by OMPT if OMPT is enabled.
99  team_info = __ompt_get_teaminfo(0, NULL);
100  task_info = __ompt_get_task_info_object(0);
101  // Determine workshare type
102  if (loc != NULL) {
103  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104  ompt_work_type = ompt_work_loop;
105  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106  ompt_work_type = ompt_work_sections;
107  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108  ompt_work_type = ompt_work_distribute;
109  } else {
110  kmp_int8 bool_res =
111  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112  if (bool_res)
113  KMP_WARNING(OmptOutdatedWorkshare);
114  }
115  KMP_DEBUG_ASSERT(ompt_work_type);
116  }
117  }
118 #endif
119 
120  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122 #ifdef KMP_DEBUG
123  {
124  char *buff;
125  // create format specifiers before the debug output
126  buff = __kmp_str_format(
127  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132  *pstride, incr, chunk));
133  __kmp_str_free(&buff);
134  }
135 #endif
136 
137  if (__kmp_env_consistency_check) {
138  __kmp_push_workshare(global_tid, ct_pdo, loc);
139  if (incr == 0) {
140  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141  loc);
142  }
143  }
144  /* special handling for zero-trip loops */
145  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146  if (plastiter != NULL)
147  *plastiter = FALSE;
148  /* leave pupper and plower set to entire iteration space */
149  *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154 #ifdef KMP_DEBUG
155  {
156  char *buff;
157  // create format specifiers before the debug output
158  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159  "lower=%%%s upper=%%%s stride = %%%s "
160  "signed?<%s>, loc = %%s\n",
161  traits_t<T>::spec, traits_t<T>::spec,
162  traits_t<ST>::spec, traits_t<T>::spec);
163  KD_TRACE(100,
164  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165  __kmp_str_free(&buff);
166  }
167 #endif
168  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169 
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171  if (ompt_enabled.ompt_callback_work) {
172  ompt_callbacks.ompt_callback(ompt_callback_work)(
173  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174  &(task_info->task_data), 0, codeptr);
175  }
176 #endif
177  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178  return;
179  }
180 
181  // Although there are schedule enumerations above kmp_ord_upper which are not
182  // schedules for "distribute", the only ones which are useful are dynamic, so
183  // cannot be seen here, since this codepath is only executed for static
184  // schedules.
185  if (schedtype > kmp_ord_upper) {
186  // we are in DISTRIBUTE construct
187  schedtype += kmp_sch_static -
188  kmp_distribute_static; // AC: convert to usual schedule type
189  tid = th->th.th_team->t.t_master_tid;
190  team = th->th.th_team->t.t_parent;
191  } else {
192  tid = __kmp_tid_from_gtid(global_tid);
193  team = th->th.th_team;
194  }
195 
196  /* determine if "for" loop is an active worksharing construct */
197  if (team->t.t_serialized) {
198  /* serialized parallel, each thread executes whole iteration space */
199  if (plastiter != NULL)
200  *plastiter = TRUE;
201  /* leave pupper and plower set to entire iteration space */
202  *pstride =
203  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
204 
205 #ifdef KMP_DEBUG
206  {
207  char *buff;
208  // create format specifiers before the debug output
209  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210  "lower=%%%s upper=%%%s stride = %%%s\n",
211  traits_t<T>::spec, traits_t<T>::spec,
212  traits_t<ST>::spec);
213  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214  __kmp_str_free(&buff);
215  }
216 #endif
217  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
218 
219 #if OMPT_SUPPORT && OMPT_OPTIONAL
220  if (ompt_enabled.ompt_callback_work) {
221  ompt_callbacks.ompt_callback(ompt_callback_work)(
222  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223  &(task_info->task_data), *pstride, codeptr);
224  }
225 #endif
226  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
227  return;
228  }
229  nth = team->t.t_nproc;
230  if (nth == 1) {
231  if (plastiter != NULL)
232  *plastiter = TRUE;
233  *pstride =
234  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
235 #ifdef KMP_DEBUG
236  {
237  char *buff;
238  // create format specifiers before the debug output
239  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240  "lower=%%%s upper=%%%s stride = %%%s\n",
241  traits_t<T>::spec, traits_t<T>::spec,
242  traits_t<ST>::spec);
243  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244  __kmp_str_free(&buff);
245  }
246 #endif
247  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
248 
249 #if OMPT_SUPPORT && OMPT_OPTIONAL
250  if (ompt_enabled.ompt_callback_work) {
251  ompt_callbacks.ompt_callback(ompt_callback_work)(
252  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253  &(task_info->task_data), *pstride, codeptr);
254  }
255 #endif
256  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
257  return;
258  }
259 
260  /* compute trip count */
261  if (incr == 1) {
262  trip_count = *pupper - *plower + 1;
263  } else if (incr == -1) {
264  trip_count = *plower - *pupper + 1;
265  } else if (incr > 0) {
266  // upper-lower can exceed the limit of signed type
267  trip_count = (UT)(*pupper - *plower) / incr + 1;
268  } else {
269  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
270  }
271 
272 #if KMP_STATS_ENABLED
273  if (KMP_MASTER_GTID(gtid)) {
274  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
275  }
276 #endif
277 
278  if (__kmp_env_consistency_check) {
279  /* tripcount overflow? */
280  if (trip_count == 0 && *pupper != *plower) {
281  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
282  loc);
283  }
284  }
285 
286  /* compute remaining parameters */
287  switch (schedtype) {
288  case kmp_sch_static: {
289  if (trip_count < nth) {
290  KMP_DEBUG_ASSERT(
291  __kmp_static == kmp_sch_static_greedy ||
292  __kmp_static ==
293  kmp_sch_static_balanced); // Unknown static scheduling type.
294  if (tid < trip_count) {
295  *pupper = *plower = *plower + tid * incr;
296  } else {
297  *plower = *pupper + incr;
298  }
299  if (plastiter != NULL)
300  *plastiter = (tid == trip_count - 1);
301  } else {
302  if (__kmp_static == kmp_sch_static_balanced) {
303  UT small_chunk = trip_count / nth;
304  UT extras = trip_count % nth;
305  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307  if (plastiter != NULL)
308  *plastiter = (tid == nth - 1);
309  } else {
310  T big_chunk_inc_count =
311  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312  T old_upper = *pupper;
313 
314  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315  // Unknown static scheduling type.
316 
317  *plower += tid * big_chunk_inc_count;
318  *pupper = *plower + big_chunk_inc_count - incr;
319  if (incr > 0) {
320  if (*pupper < *plower)
321  *pupper = traits_t<T>::max_value;
322  if (plastiter != NULL)
323  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324  if (*pupper > old_upper)
325  *pupper = old_upper; // tracker C73258
326  } else {
327  if (*pupper > *plower)
328  *pupper = traits_t<T>::min_value;
329  if (plastiter != NULL)
330  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331  if (*pupper < old_upper)
332  *pupper = old_upper; // tracker C73258
333  }
334  }
335  }
336  *pstride = trip_count;
337  break;
338  }
339  case kmp_sch_static_chunked: {
340  ST span;
341  if (chunk < 1) {
342  chunk = 1;
343  }
344  span = chunk * incr;
345  *pstride = span * nth;
346  *plower = *plower + (span * tid);
347  *pupper = *plower + span - incr;
348  if (plastiter != NULL)
349  *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
350  break;
351  }
352  case kmp_sch_static_balanced_chunked: {
353  T old_upper = *pupper;
354  // round up to make sure the chunk is enough to cover all iterations
355  UT span = (trip_count + nth - 1) / nth;
356 
357  // perform chunk adjustment
358  chunk = (span + chunk - 1) & ~(chunk - 1);
359 
360  span = chunk * incr;
361  *plower = *plower + (span * tid);
362  *pupper = *plower + span - incr;
363  if (incr > 0) {
364  if (*pupper > old_upper)
365  *pupper = old_upper;
366  } else if (*pupper < old_upper)
367  *pupper = old_upper;
368 
369  if (plastiter != NULL)
370  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
371  break;
372  }
373  default:
374  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
375  break;
376  }
377 
378 #if USE_ITT_BUILD
379  // Report loop metadata
380  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381  __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382  team->t.t_active_level == 1) {
383  kmp_uint64 cur_chunk = chunk;
384  // Calculate chunk in case it was not specified; it is specified for
385  // kmp_sch_static_chunked
386  if (schedtype == kmp_sch_static) {
387  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
388  }
389  // 0 - "static" schedule
390  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
391  }
392 #endif
393 #ifdef KMP_DEBUG
394  {
395  char *buff;
396  // create format specifiers before the debug output
397  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398  "upper=%%%s stride = %%%s signed?<%s>\n",
399  traits_t<T>::spec, traits_t<T>::spec,
400  traits_t<ST>::spec, traits_t<T>::spec);
401  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402  __kmp_str_free(&buff);
403  }
404 #endif
405  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
406 
407 #if OMPT_SUPPORT && OMPT_OPTIONAL
408  if (ompt_enabled.ompt_callback_work) {
409  ompt_callbacks.ompt_callback(ompt_callback_work)(
410  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411  &(task_info->task_data), trip_count, codeptr);
412  }
413 #endif
414 
415  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
416  return;
417 }
418 
419 template <typename T>
420 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421  kmp_int32 schedule, kmp_int32 *plastiter,
422  T *plower, T *pupper, T *pupperDist,
423  typename traits_t<T>::signed_t *pstride,
424  typename traits_t<T>::signed_t incr,
425  typename traits_t<T>::signed_t chunk) {
426  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429  typedef typename traits_t<T>::unsigned_t UT;
430  typedef typename traits_t<T>::signed_t ST;
431  kmp_uint32 tid;
432  kmp_uint32 nth;
433  kmp_uint32 team_id;
434  kmp_uint32 nteams;
435  UT trip_count;
436  kmp_team_t *team;
437  kmp_info_t *th;
438 
439  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
441 #ifdef KMP_DEBUG
442  {
443  char *buff;
444  // create format specifiers before the debug output
445  buff = __kmp_str_format(
446  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449  traits_t<ST>::spec, traits_t<T>::spec);
450  KD_TRACE(100,
451  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452  __kmp_str_free(&buff);
453  }
454 #endif
455 
456  if (__kmp_env_consistency_check) {
457  __kmp_push_workshare(gtid, ct_pdo, loc);
458  if (incr == 0) {
459  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
460  loc);
461  }
462  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463  // The loop is illegal.
464  // Some zero-trip loops maintained by compiler, e.g.:
465  // for(i=10;i<0;++i) // lower >= upper - run-time check
466  // for(i=0;i>10;--i) // lower <= upper - run-time check
467  // for(i=0;i>10;++i) // incr > 0 - compile-time check
468  // for(i=10;i<0;--i) // incr < 0 - compile-time check
469  // Compiler does not check the following illegal loops:
470  // for(i=0;i<10;i+=incr) // where incr<0
471  // for(i=10;i>0;i-=incr) // where incr<0
472  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
473  }
474  }
475  tid = __kmp_tid_from_gtid(gtid);
476  th = __kmp_threads[gtid];
477  nth = th->th.th_team_nproc;
478  team = th->th.th_team;
479  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480  nteams = th->th.th_teams_size.nteams;
481  team_id = team->t.t_master_tid;
482  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
483 
484  // compute global trip count
485  if (incr == 1) {
486  trip_count = *pupper - *plower + 1;
487  } else if (incr == -1) {
488  trip_count = *plower - *pupper + 1;
489  } else if (incr > 0) {
490  // upper-lower can exceed the limit of signed type
491  trip_count = (UT)(*pupper - *plower) / incr + 1;
492  } else {
493  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
494  }
495 
496  *pstride = *pupper - *plower; // just in case (can be unused)
497  if (trip_count <= nteams) {
498  KMP_DEBUG_ASSERT(
499  __kmp_static == kmp_sch_static_greedy ||
500  __kmp_static ==
501  kmp_sch_static_balanced); // Unknown static scheduling type.
502  // only masters of some teams get single iteration, other threads get
503  // nothing
504  if (team_id < trip_count && tid == 0) {
505  *pupper = *pupperDist = *plower = *plower + team_id * incr;
506  } else {
507  *pupperDist = *pupper;
508  *plower = *pupper + incr; // compiler should skip loop body
509  }
510  if (plastiter != NULL)
511  *plastiter = (tid == 0 && team_id == trip_count - 1);
512  } else {
513  // Get the team's chunk first (each team gets at most one chunk)
514  if (__kmp_static == kmp_sch_static_balanced) {
515  UT chunkD = trip_count / nteams;
516  UT extras = trip_count % nteams;
517  *plower +=
518  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520  if (plastiter != NULL)
521  *plastiter = (team_id == nteams - 1);
522  } else {
523  T chunk_inc_count =
524  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
525  T upper = *pupper;
526  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527  // Unknown static scheduling type.
528  *plower += team_id * chunk_inc_count;
529  *pupperDist = *plower + chunk_inc_count - incr;
530  // Check/correct bounds if needed
531  if (incr > 0) {
532  if (*pupperDist < *plower)
533  *pupperDist = traits_t<T>::max_value;
534  if (plastiter != NULL)
535  *plastiter = *plower <= upper && *pupperDist > upper - incr;
536  if (*pupperDist > upper)
537  *pupperDist = upper; // tracker C73258
538  if (*plower > *pupperDist) {
539  *pupper = *pupperDist; // no iterations available for the team
540  goto end;
541  }
542  } else {
543  if (*pupperDist > *plower)
544  *pupperDist = traits_t<T>::min_value;
545  if (plastiter != NULL)
546  *plastiter = *plower >= upper && *pupperDist < upper - incr;
547  if (*pupperDist < upper)
548  *pupperDist = upper; // tracker C73258
549  if (*plower < *pupperDist) {
550  *pupper = *pupperDist; // no iterations available for the team
551  goto end;
552  }
553  }
554  }
555  // Get the parallel loop chunk now (for thread)
556  // compute trip count for team's chunk
557  if (incr == 1) {
558  trip_count = *pupperDist - *plower + 1;
559  } else if (incr == -1) {
560  trip_count = *plower - *pupperDist + 1;
561  } else if (incr > 1) {
562  // upper-lower can exceed the limit of signed type
563  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
564  } else {
565  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
566  }
567  KMP_DEBUG_ASSERT(trip_count);
568  switch (schedule) {
569  case kmp_sch_static: {
570  if (trip_count <= nth) {
571  KMP_DEBUG_ASSERT(
572  __kmp_static == kmp_sch_static_greedy ||
573  __kmp_static ==
574  kmp_sch_static_balanced); // Unknown static scheduling type.
575  if (tid < trip_count)
576  *pupper = *plower = *plower + tid * incr;
577  else
578  *plower = *pupper + incr; // no iterations available
579  if (plastiter != NULL)
580  if (*plastiter != 0 && !(tid == trip_count - 1))
581  *plastiter = 0;
582  } else {
583  if (__kmp_static == kmp_sch_static_balanced) {
584  UT chunkL = trip_count / nth;
585  UT extras = trip_count % nth;
586  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588  if (plastiter != NULL)
589  if (*plastiter != 0 && !(tid == nth - 1))
590  *plastiter = 0;
591  } else {
592  T chunk_inc_count =
593  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594  T upper = *pupperDist;
595  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596  // Unknown static scheduling type.
597  *plower += tid * chunk_inc_count;
598  *pupper = *plower + chunk_inc_count - incr;
599  if (incr > 0) {
600  if (*pupper < *plower)
601  *pupper = traits_t<T>::max_value;
602  if (plastiter != NULL)
603  if (*plastiter != 0 &&
604  !(*plower <= upper && *pupper > upper - incr))
605  *plastiter = 0;
606  if (*pupper > upper)
607  *pupper = upper; // tracker C73258
608  } else {
609  if (*pupper > *plower)
610  *pupper = traits_t<T>::min_value;
611  if (plastiter != NULL)
612  if (*plastiter != 0 &&
613  !(*plower >= upper && *pupper < upper - incr))
614  *plastiter = 0;
615  if (*pupper < upper)
616  *pupper = upper; // tracker C73258
617  }
618  }
619  }
620  break;
621  }
622  case kmp_sch_static_chunked: {
623  ST span;
624  if (chunk < 1)
625  chunk = 1;
626  span = chunk * incr;
627  *pstride = span * nth;
628  *plower = *plower + (span * tid);
629  *pupper = *plower + span - incr;
630  if (plastiter != NULL)
631  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
632  *plastiter = 0;
633  break;
634  }
635  default:
636  KMP_ASSERT2(0,
637  "__kmpc_dist_for_static_init: unknown loop scheduling type");
638  break;
639  }
640  }
641 end:;
642 #ifdef KMP_DEBUG
643  {
644  char *buff;
645  // create format specifiers before the debug output
646  buff = __kmp_str_format(
647  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648  "stride=%%%s signed?<%s>\n",
649  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650  traits_t<ST>::spec, traits_t<T>::spec);
651  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652  __kmp_str_free(&buff);
653  }
654 #endif
655  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656  KMP_STATS_LOOP_END(OMP_distribute_iterations);
657  return;
658 }
659 
660 template <typename T>
661 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662  kmp_int32 *p_last, T *p_lb, T *p_ub,
663  typename traits_t<T>::signed_t *p_st,
664  typename traits_t<T>::signed_t incr,
665  typename traits_t<T>::signed_t chunk) {
666  // The routine returns the first chunk distributed to the team and
667  // stride for next chunks calculation.
668  // Last iteration flag set for the team that will execute
669  // the last iteration of the loop.
670  // The routine is called for dist_schedue(static,chunk) only.
671  typedef typename traits_t<T>::unsigned_t UT;
672  typedef typename traits_t<T>::signed_t ST;
673  kmp_uint32 team_id;
674  kmp_uint32 nteams;
675  UT trip_count;
676  T lower;
677  T upper;
678  ST span;
679  kmp_team_t *team;
680  kmp_info_t *th;
681 
682  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
684 #ifdef KMP_DEBUG
685  {
686  char *buff;
687  // create format specifiers before the debug output
688  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690  traits_t<T>::spec, traits_t<T>::spec,
691  traits_t<ST>::spec, traits_t<ST>::spec,
692  traits_t<T>::spec);
693  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694  __kmp_str_free(&buff);
695  }
696 #endif
697 
698  lower = *p_lb;
699  upper = *p_ub;
700  if (__kmp_env_consistency_check) {
701  if (incr == 0) {
702  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
703  loc);
704  }
705  if (incr > 0 ? (upper < lower) : (lower < upper)) {
706  // The loop is illegal.
707  // Some zero-trip loops maintained by compiler, e.g.:
708  // for(i=10;i<0;++i) // lower >= upper - run-time check
709  // for(i=0;i>10;--i) // lower <= upper - run-time check
710  // for(i=0;i>10;++i) // incr > 0 - compile-time check
711  // for(i=10;i<0;--i) // incr < 0 - compile-time check
712  // Compiler does not check the following illegal loops:
713  // for(i=0;i<10;i+=incr) // where incr<0
714  // for(i=10;i>0;i-=incr) // where incr<0
715  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
716  }
717  }
718  th = __kmp_threads[gtid];
719  team = th->th.th_team;
720  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721  nteams = th->th.th_teams_size.nteams;
722  team_id = team->t.t_master_tid;
723  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
724 
725  // compute trip count
726  if (incr == 1) {
727  trip_count = upper - lower + 1;
728  } else if (incr == -1) {
729  trip_count = lower - upper + 1;
730  } else if (incr > 0) {
731  // upper-lower can exceed the limit of signed type
732  trip_count = (UT)(upper - lower) / incr + 1;
733  } else {
734  trip_count = (UT)(lower - upper) / (-incr) + 1;
735  }
736  if (chunk < 1)
737  chunk = 1;
738  span = chunk * incr;
739  *p_st = span * nteams;
740  *p_lb = lower + (span * team_id);
741  *p_ub = *p_lb + span - incr;
742  if (p_last != NULL)
743  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744  // Correct upper bound if needed
745  if (incr > 0) {
746  if (*p_ub < *p_lb) // overflow?
747  *p_ub = traits_t<T>::max_value;
748  if (*p_ub > upper)
749  *p_ub = upper; // tracker C73258
750  } else { // incr < 0
751  if (*p_ub > *p_lb)
752  *p_ub = traits_t<T>::min_value;
753  if (*p_ub < upper)
754  *p_ub = upper; // tracker C73258
755  }
756 #ifdef KMP_DEBUG
757  {
758  char *buff;
759  // create format specifiers before the debug output
760  buff =
761  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763  traits_t<T>::spec, traits_t<T>::spec,
764  traits_t<ST>::spec, traits_t<ST>::spec);
765  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766  __kmp_str_free(&buff);
767  }
768 #endif
769 }
770 
771 //------------------------------------------------------------------------------
772 extern "C" {
794 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795  kmp_int32 *plastiter, kmp_int32 *plower,
796  kmp_int32 *pupper, kmp_int32 *pstride,
797  kmp_int32 incr, kmp_int32 chunk) {
798  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799  pupper, pstride, incr, chunk
800 #if OMPT_SUPPORT && OMPT_OPTIONAL
801  ,
802  OMPT_GET_RETURN_ADDRESS(0)
803 #endif
804  );
805 }
806 
810 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811  kmp_int32 schedtype, kmp_int32 *plastiter,
812  kmp_uint32 *plower, kmp_uint32 *pupper,
813  kmp_int32 *pstride, kmp_int32 incr,
814  kmp_int32 chunk) {
815  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816  pupper, pstride, incr, chunk
817 #if OMPT_SUPPORT && OMPT_OPTIONAL
818  ,
819  OMPT_GET_RETURN_ADDRESS(0)
820 #endif
821  );
822 }
823 
827 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828  kmp_int32 *plastiter, kmp_int64 *plower,
829  kmp_int64 *pupper, kmp_int64 *pstride,
830  kmp_int64 incr, kmp_int64 chunk) {
831  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832  pupper, pstride, incr, chunk
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
834  ,
835  OMPT_GET_RETURN_ADDRESS(0)
836 #endif
837  );
838 }
839 
843 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844  kmp_int32 schedtype, kmp_int32 *plastiter,
845  kmp_uint64 *plower, kmp_uint64 *pupper,
846  kmp_int64 *pstride, kmp_int64 incr,
847  kmp_int64 chunk) {
848  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849  pupper, pstride, incr, chunk
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
851  ,
852  OMPT_GET_RETURN_ADDRESS(0)
853 #endif
854  );
855 }
882 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883  kmp_int32 schedule, kmp_int32 *plastiter,
884  kmp_int32 *plower, kmp_int32 *pupper,
885  kmp_int32 *pupperD, kmp_int32 *pstride,
886  kmp_int32 incr, kmp_int32 chunk) {
887  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888  pupper, pupperD, pstride, incr, chunk);
889 }
890 
894 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895  kmp_int32 schedule, kmp_int32 *plastiter,
896  kmp_uint32 *plower, kmp_uint32 *pupper,
897  kmp_uint32 *pupperD, kmp_int32 *pstride,
898  kmp_int32 incr, kmp_int32 chunk) {
899  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900  pupper, pupperD, pstride, incr, chunk);
901 }
902 
906 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907  kmp_int32 schedule, kmp_int32 *plastiter,
908  kmp_int64 *plower, kmp_int64 *pupper,
909  kmp_int64 *pupperD, kmp_int64 *pstride,
910  kmp_int64 incr, kmp_int64 chunk) {
911  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912  pupper, pupperD, pstride, incr, chunk);
913 }
914 
918 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919  kmp_int32 schedule, kmp_int32 *plastiter,
920  kmp_uint64 *plower, kmp_uint64 *pupper,
921  kmp_uint64 *pupperD, kmp_int64 *pstride,
922  kmp_int64 incr, kmp_int64 chunk) {
923  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924  pupper, pupperD, pstride, incr, chunk);
925 }
930 //------------------------------------------------------------------------------
931 // Auxiliary routines for Distribute Parallel Loop construct implementation
932 // Transfer call to template< type T >
933 // __kmp_team_static_init( ident_t *loc, int gtid,
934 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
935 
956 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957  kmp_int32 *p_lb, kmp_int32 *p_ub,
958  kmp_int32 *p_st, kmp_int32 incr,
959  kmp_int32 chunk) {
960  KMP_DEBUG_ASSERT(__kmp_init_serial);
961  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
962  chunk);
963 }
964 
968 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970  kmp_int32 *p_st, kmp_int32 incr,
971  kmp_int32 chunk) {
972  KMP_DEBUG_ASSERT(__kmp_init_serial);
973  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974  chunk);
975 }
976 
980 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981  kmp_int64 *p_lb, kmp_int64 *p_ub,
982  kmp_int64 *p_st, kmp_int64 incr,
983  kmp_int64 chunk) {
984  KMP_DEBUG_ASSERT(__kmp_init_serial);
985  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986  chunk);
987 }
988 
992 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994  kmp_int64 *p_st, kmp_int64 incr,
995  kmp_int64 chunk) {
996  KMP_DEBUG_ASSERT(__kmp_init_serial);
997  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998  chunk);
999 }
1004 } // extern "C"
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:980
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:894
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:887
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:956
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:827
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:968
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:810
Definition: kmp.h:222
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:882
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:992
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:918
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:794
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:906
char const * psource
Definition: kmp.h:232
kmp_int32 flags
Definition: kmp.h:224
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:843