LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  lhs.q += rhs.q;
611 }
612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  lhs.q -= rhs.q;
614 }
615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  lhs.q *= rhs.q;
617 }
618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  lhs.q /= rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  lhs.q += rhs.q;
630 }
631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  lhs.q -= rhs.q;
633 }
634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  lhs.q *= rhs.q;
636 }
637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  lhs.q /= rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648  lhs.q += rhs.q;
649 }
650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651  lhs.q -= rhs.q;
652 }
653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654  lhs.q *= rhs.q;
655 }
656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657  lhs.q /= rhs.q;
658 }
659 
660 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661  kmp_cmplx128_a16_t &rhs) {
662  lhs.q += rhs.q;
663 }
664 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  lhs.q -= rhs.q;
667 }
668 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  lhs.q *= rhs.q;
671 }
672 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  lhs.q /= rhs.q;
675 }
676 
677 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
678 
679 // ATOMIC implementation routines -----------------------------------------
680 // One routine for each operation and operand type.
681 // All routines declarations looks like
682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683 
684 #define KMP_CHECK_GTID \
685  if (gtid == KMP_GTID_UNKNOWN) { \
686  gtid = __kmp_entry_gtid(); \
687  } // check and get gtid when needed
688 
689 // Beginning of a definition (provides name, parameters, gebug trace)
690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691 // fixed)
692 // OP_ID - operation identifier (add, sub, mul, ...)
693 // TYPE - operands' type
694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
695  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
696  TYPE *lhs, TYPE rhs) { \
697  KMP_DEBUG_ASSERT(__kmp_init_serial); \
698  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699 
700 // ------------------------------------------------------------------------
701 // Lock variables used for critical sections for various size operands
702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715 
716 // ------------------------------------------------------------------------
717 // Operation on *lhs, rhs bound by critical section
718 // OP - operator (it's supposed to contain an assignment)
719 // LCK_ID - lock identifier
720 // Note: don't check gtid as it should always be valid
721 // 1, 2-byte - expect valid parameter, other - check before this macro
722 #define OP_CRITICAL(OP, LCK_ID) \
723  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
724  \
725  (*lhs) OP(rhs); \
726  \
727  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728 
729 // ------------------------------------------------------------------------
730 // For GNU compatibility, we may need to use a critical section,
731 // even though it is not required by the ISA.
732 //
733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735 // critical section. On Intel(R) 64, all atomic operations are done with fetch
736 // and add or compare and exchange. Therefore, the FLAG parameter to this
737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738 // require a critical section, where we predict that they will be implemented
739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740 //
741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742 // the FLAG parameter should always be 1. If we know that we will be using
743 // a critical section, then we want to make certain that we use the generic
744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
745 // locks that are specialized based upon the size or type of the data.
746 //
747 // If FLAG is 0, then we are relying on dead code elimination by the build
748 // compiler to get rid of the useless block of code, and save a needless
749 // branch at runtime.
750 
751 #ifdef KMP_GOMP_COMPAT
752 #define OP_GOMP_CRITICAL(OP, FLAG) \
753  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
754  KMP_CHECK_GTID; \
755  OP_CRITICAL(OP, 0); \
756  return; \
757  }
758 #else
759 #define OP_GOMP_CRITICAL(OP, FLAG)
760 #endif /* KMP_GOMP_COMPAT */
761 
762 #if KMP_MIC
763 #define KMP_DO_PAUSE _mm_delay_32(1)
764 #else
765 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
766 #endif /* KMP_MIC */
767 
768 // ------------------------------------------------------------------------
769 // Operation on *lhs, rhs using "compare_and_store" routine
770 // TYPE - operands' type
771 // BITS - size in bits, used to distinguish low level calls
772 // OP - operator
773 #define OP_CMPXCHG(TYPE, BITS, OP) \
774  { \
775  TYPE old_value, new_value; \
776  old_value = *(TYPE volatile *)lhs; \
777  new_value = old_value OP rhs; \
778  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
779  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
780  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
781  KMP_DO_PAUSE; \
782  \
783  old_value = *(TYPE volatile *)lhs; \
784  new_value = old_value OP rhs; \
785  } \
786  }
787 
788 #if USE_CMPXCHG_FIX
789 // 2007-06-25:
790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794 // the workaround.
795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
796  { \
797  struct _sss { \
798  TYPE cmp; \
799  kmp_int##BITS *vvv; \
800  }; \
801  struct _sss old_value, new_value; \
802  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
803  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
804  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
805  new_value.cmp = old_value.cmp OP rhs; \
806  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
807  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
808  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
809  KMP_DO_PAUSE; \
810  \
811  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
812  new_value.cmp = old_value.cmp OP rhs; \
813  } \
814  }
815 // end of the first part of the workaround for C78287
816 #endif // USE_CMPXCHG_FIX
817 
818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
819 
820 // ------------------------------------------------------------------------
821 // X86 or X86_64: no alignment problems ====================================
822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
823  GOMP_FLAG) \
824  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
825  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
826  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
827  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
828  }
829 // -------------------------------------------------------------------------
830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
831  GOMP_FLAG) \
832  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
833  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
834  OP_CMPXCHG(TYPE, BITS, OP) \
835  }
836 #if USE_CMPXCHG_FIX
837 // -------------------------------------------------------------------------
838 // workaround for C78287 (complex(kind=4) data type)
839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
840  MASK, GOMP_FLAG) \
841  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
843  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
844  }
845 // end of the second part of the workaround for C78287
846 #endif // USE_CMPXCHG_FIX
847 
848 #else
849 // -------------------------------------------------------------------------
850 // Code for other architectures that don't handle unaligned accesses.
851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
852  GOMP_FLAG) \
853  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
854  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
855  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
856  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
857  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
858  } else { \
859  KMP_CHECK_GTID; \
860  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
861  } \
862  }
863 // -------------------------------------------------------------------------
864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
865  GOMP_FLAG) \
866  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
867  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
868  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
869  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
870  } else { \
871  KMP_CHECK_GTID; \
872  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
873  } \
874  }
875 #if USE_CMPXCHG_FIX
876 // -------------------------------------------------------------------------
877 // workaround for C78287 (complex(kind=4) data type)
878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
879  MASK, GOMP_FLAG) \
880  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
881  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
882  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
883  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
884  } else { \
885  KMP_CHECK_GTID; \
886  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
887  } \
888  }
889 // end of the second part of the workaround for C78287
890 #endif // USE_CMPXCHG_FIX
891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892 
893 // Routines for ATOMIC 4-byte operands addition and subtraction
894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895  0) // __kmpc_atomic_fixed4_add
896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897  0) // __kmpc_atomic_fixed4_sub
898 
899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900  KMP_ARCH_X86) // __kmpc_atomic_float4_add
901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903 
904 // Routines for ATOMIC 8-byte operands addition and subtraction
905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909 
910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911  KMP_ARCH_X86) // __kmpc_atomic_float8_add
912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914 
915 // ------------------------------------------------------------------------
916 // Entries definition for integer operands
917 // TYPE_ID - operands type and size (fixed4, float4)
918 // OP_ID - operation identifier (add, sub, mul, ...)
919 // TYPE - operand type
920 // BITS - size in bits, used to distinguish low level calls
921 // OP - operator (used in critical section)
922 // LCK_ID - lock identifier, used to possibly distinguish lock variable
923 // MASK - used for alignment check
924 
925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
926 // ------------------------------------------------------------------------
927 // Routines for ATOMIC integer operands, other operators
928 // ------------------------------------------------------------------------
929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933  0) // __kmpc_atomic_fixed1_andb
934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941  0) // __kmpc_atomic_fixed1_orb
942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951  0) // __kmpc_atomic_fixed1_xor
952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955  0) // __kmpc_atomic_fixed2_andb
956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963  0) // __kmpc_atomic_fixed2_orb
964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973  0) // __kmpc_atomic_fixed2_xor
974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975  0) // __kmpc_atomic_fixed4_andb
976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983  0) // __kmpc_atomic_fixed4_orb
984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991  0) // __kmpc_atomic_fixed4_xor
992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1019 
1020 /* ------------------------------------------------------------------------ */
1021 /* Routines for C/C++ Reduction operators && and || */
1022 
1023 // ------------------------------------------------------------------------
1024 // Need separate macros for &&, || because there is no combined assignment
1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1027  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1028  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1029  OP_CRITICAL(= *lhs OP, LCK_ID) \
1030  }
1031 
1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033 
1034 // ------------------------------------------------------------------------
1035 // X86 or X86_64: no alignment problems ===================================
1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1038  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1039  OP_CMPXCHG(TYPE, BITS, OP) \
1040  }
1041 
1042 #else
1043 // ------------------------------------------------------------------------
1044 // Code for other architectures that don't handle unaligned accesses.
1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1047  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1048  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1049  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1050  } else { \
1051  KMP_CHECK_GTID; \
1052  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1053  } \
1054  }
1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056 
1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066  0) // __kmpc_atomic_fixed4_andl
1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068  0) // __kmpc_atomic_fixed4_orl
1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073 
1074 /* ------------------------------------------------------------------------- */
1075 /* Routines for Fortran operators that matched no one in C: */
1076 /* MAX, MIN, .EQV., .NEQV. */
1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1079 
1080 // -------------------------------------------------------------------------
1081 // MIN and MAX need separate macros
1082 // OP - operator to check if we need any actions?
1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1084  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1085  \
1086  if (*lhs OP rhs) { /* still need actions? */ \
1087  *lhs = rhs; \
1088  } \
1089  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090 
1091 // -------------------------------------------------------------------------
1092 #ifdef KMP_GOMP_COMPAT
1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1094  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1095  KMP_CHECK_GTID; \
1096  MIN_MAX_CRITSECT(OP, 0); \
1097  return; \
1098  }
1099 #else
1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101 #endif /* KMP_GOMP_COMPAT */
1102 
1103 // -------------------------------------------------------------------------
1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1105  { \
1106  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1107  TYPE old_value; \
1108  temp_val = *lhs; \
1109  old_value = temp_val; \
1110  while (old_value OP rhs && /* still need actions? */ \
1111  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1112  (kmp_int##BITS *)lhs, \
1113  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1114  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1115  KMP_CPU_PAUSE(); \
1116  temp_val = *lhs; \
1117  old_value = temp_val; \
1118  } \
1119  }
1120 
1121 // -------------------------------------------------------------------------
1122 // 1-byte, 2-byte operands - use critical section
1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1124  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1125  if (*lhs OP rhs) { /* need actions? */ \
1126  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1127  MIN_MAX_CRITSECT(OP, LCK_ID) \
1128  } \
1129  }
1130 
1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132 
1133 // -------------------------------------------------------------------------
1134 // X86 or X86_64: no alignment problems ====================================
1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1136  GOMP_FLAG) \
1137  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1138  if (*lhs OP rhs) { \
1139  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1140  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1141  } \
1142  }
1143 
1144 #else
1145 // -------------------------------------------------------------------------
1146 // Code for other architectures that don't handle unaligned accesses.
1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1148  GOMP_FLAG) \
1149  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1150  if (*lhs OP rhs) { \
1151  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1152  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1153  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1154  } else { \
1155  KMP_CHECK_GTID; \
1156  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1157  } \
1158  } \
1159  }
1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161 
1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171  0) // __kmpc_atomic_fixed4_max
1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173  0) // __kmpc_atomic_fixed4_min
1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186 #if KMP_HAVE_QUAD
1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188  1) // __kmpc_atomic_float16_max
1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190  1) // __kmpc_atomic_float16_min
1191 #if (KMP_ARCH_X86)
1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193  1) // __kmpc_atomic_float16_max_a16
1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195  1) // __kmpc_atomic_float16_min_a16
1196 #endif // (KMP_ARCH_X86)
1197 #endif // KMP_HAVE_QUAD
1198 // ------------------------------------------------------------------------
1199 // Need separate macros for .EQV. because of the need of complement (~)
1200 // OP ignored for critical sections, ^=~ used instead
1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1202  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1203  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1204  OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1205  }
1206 
1207 // ------------------------------------------------------------------------
1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209 // ------------------------------------------------------------------------
1210 // X86 or X86_64: no alignment problems ===================================
1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1212  GOMP_FLAG) \
1213  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1214  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1215  OP_CMPXCHG(TYPE, BITS, OP) \
1216  }
1217 // ------------------------------------------------------------------------
1218 #else
1219 // ------------------------------------------------------------------------
1220 // Code for other architectures that don't handle unaligned accesses.
1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1222  GOMP_FLAG) \
1223  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1224  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1225  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1226  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1227  } else { \
1228  KMP_CHECK_GTID; \
1229  OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1230  } \
1231  }
1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233 
1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250 
1251 // ------------------------------------------------------------------------
1252 // Routines for Extended types: long double, _Quad, complex flavours (use
1253 // critical section)
1254 // TYPE_ID, OP_ID, TYPE - detailed above
1255 // OP - operator
1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1258  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1259  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1260  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1261  }
1262 
1263 /* ------------------------------------------------------------------------- */
1264 // routines for long double type
1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266  1) // __kmpc_atomic_float10_add
1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268  1) // __kmpc_atomic_float10_sub
1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270  1) // __kmpc_atomic_float10_mul
1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272  1) // __kmpc_atomic_float10_div
1273 #if KMP_HAVE_QUAD
1274 // routines for _Quad type
1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276  1) // __kmpc_atomic_float16_add
1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278  1) // __kmpc_atomic_float16_sub
1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280  1) // __kmpc_atomic_float16_mul
1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282  1) // __kmpc_atomic_float16_div
1283 #if (KMP_ARCH_X86)
1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285  1) // __kmpc_atomic_float16_add_a16
1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287  1) // __kmpc_atomic_float16_sub_a16
1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289  1) // __kmpc_atomic_float16_mul_a16
1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291  1) // __kmpc_atomic_float16_div_a16
1292 #endif // (KMP_ARCH_X86)
1293 #endif // KMP_HAVE_QUAD
1294 // routines for complex types
1295 
1296 #if USE_CMPXCHG_FIX
1297 // workaround for C78287 (complex(kind=4) data type)
1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299  1) // __kmpc_atomic_cmplx4_add
1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301  1) // __kmpc_atomic_cmplx4_sub
1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303  1) // __kmpc_atomic_cmplx4_mul
1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305  1) // __kmpc_atomic_cmplx4_div
1306 // end of the workaround for C78287
1307 #else
1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312 #endif // USE_CMPXCHG_FIX
1313 
1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319  1) // __kmpc_atomic_cmplx10_add
1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321  1) // __kmpc_atomic_cmplx10_sub
1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323  1) // __kmpc_atomic_cmplx10_mul
1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325  1) // __kmpc_atomic_cmplx10_div
1326 #if KMP_HAVE_QUAD
1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328  1) // __kmpc_atomic_cmplx16_add
1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330  1) // __kmpc_atomic_cmplx16_sub
1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332  1) // __kmpc_atomic_cmplx16_mul
1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334  1) // __kmpc_atomic_cmplx16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337  1) // __kmpc_atomic_cmplx16_add_a16
1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339  1) // __kmpc_atomic_cmplx16_sub_a16
1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341  1) // __kmpc_atomic_cmplx16_mul_a16
1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343  1) // __kmpc_atomic_cmplx16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1346 
1347 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1348 // Supported only on IA-32 architecture and Intel(R) 64
1349 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1350 
1351 // ------------------------------------------------------------------------
1352 // Operation on *lhs, rhs bound by critical section
1353 // OP - operator (it's supposed to contain an assignment)
1354 // LCK_ID - lock identifier
1355 // Note: don't check gtid as it should always be valid
1356 // 1, 2-byte - expect valid parameter, other - check before this macro
1357 #define OP_CRITICAL_REV(OP, LCK_ID) \
1358  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1359  \
1360  (*lhs) = (rhs)OP(*lhs); \
1361  \
1362  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1363 
1364 #ifdef KMP_GOMP_COMPAT
1365 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1366  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1367  KMP_CHECK_GTID; \
1368  OP_CRITICAL_REV(OP, 0); \
1369  return; \
1370  }
1371 #else
1372 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1373 #endif /* KMP_GOMP_COMPAT */
1374 
1375 // Beginning of a definition (provides name, parameters, gebug trace)
1376 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1377 // fixed)
1378 // OP_ID - operation identifier (add, sub, mul, ...)
1379 // TYPE - operands' type
1380 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1381  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1382  TYPE *lhs, TYPE rhs) { \
1383  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1384  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1385 
1386 // ------------------------------------------------------------------------
1387 // Operation on *lhs, rhs using "compare_and_store" routine
1388 // TYPE - operands' type
1389 // BITS - size in bits, used to distinguish low level calls
1390 // OP - operator
1391 // Note: temp_val introduced in order to force the compiler to read
1392 // *lhs only once (w/o it the compiler reads *lhs twice)
1393 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1394  { \
1395  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1396  TYPE old_value, new_value; \
1397  temp_val = *lhs; \
1398  old_value = temp_val; \
1399  new_value = rhs OP old_value; \
1400  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1401  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1402  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1403  KMP_DO_PAUSE; \
1404  \
1405  temp_val = *lhs; \
1406  old_value = temp_val; \
1407  new_value = rhs OP old_value; \
1408  } \
1409  }
1410 
1411 // -------------------------------------------------------------------------
1412 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1413  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1414  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1415  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1416  }
1417 
1418 // ------------------------------------------------------------------------
1419 // Entries definition for integer operands
1420 // TYPE_ID - operands type and size (fixed4, float4)
1421 // OP_ID - operation identifier (add, sub, mul, ...)
1422 // TYPE - operand type
1423 // BITS - size in bits, used to distinguish low level calls
1424 // OP - operator (used in critical section)
1425 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1426 
1427 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1428 // ------------------------------------------------------------------------
1429 // Routines for ATOMIC integer operands, other operators
1430 // ------------------------------------------------------------------------
1431 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1432 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1433  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1434 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1435  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1436 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1437  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1438 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1439  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1440 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1441  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1442 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1443  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1444 
1445 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1446  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1447 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1448  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1449 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1450  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1451 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1452  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1453 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1454  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1455 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1456  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1457 
1458 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1459  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1460 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1461  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1462 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1463  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1464 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1465  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1466 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1467  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1468 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1469  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1470 
1471 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1472  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1473 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1474  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1475 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1476  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1477 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1478  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1479 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1480  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1481 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1482  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1483 
1484 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1485  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1486 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1487  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1488 
1489 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1490  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1491 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1492  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1493 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1494 
1495 // ------------------------------------------------------------------------
1496 // Routines for Extended types: long double, _Quad, complex flavours (use
1497 // critical section)
1498 // TYPE_ID, OP_ID, TYPE - detailed above
1499 // OP - operator
1500 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1501 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1502  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1503  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1504  OP_CRITICAL_REV(OP, LCK_ID) \
1505  }
1506 
1507 /* ------------------------------------------------------------------------- */
1508 // routines for long double type
1509 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1510  1) // __kmpc_atomic_float10_sub_rev
1511 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1512  1) // __kmpc_atomic_float10_div_rev
1513 #if KMP_HAVE_QUAD
1514 // routines for _Quad type
1515 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1516  1) // __kmpc_atomic_float16_sub_rev
1517 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1518  1) // __kmpc_atomic_float16_div_rev
1519 #if (KMP_ARCH_X86)
1520 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1521  1) // __kmpc_atomic_float16_sub_a16_rev
1522 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1523  1) // __kmpc_atomic_float16_div_a16_rev
1524 #endif // KMP_ARCH_X86
1525 #endif // KMP_HAVE_QUAD
1526 
1527 // routines for complex types
1528 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1529  1) // __kmpc_atomic_cmplx4_sub_rev
1530 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1531  1) // __kmpc_atomic_cmplx4_div_rev
1532 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1533  1) // __kmpc_atomic_cmplx8_sub_rev
1534 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1535  1) // __kmpc_atomic_cmplx8_div_rev
1536 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1537  1) // __kmpc_atomic_cmplx10_sub_rev
1538 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1539  1) // __kmpc_atomic_cmplx10_div_rev
1540 #if KMP_HAVE_QUAD
1541 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1542  1) // __kmpc_atomic_cmplx16_sub_rev
1543 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1544  1) // __kmpc_atomic_cmplx16_div_rev
1545 #if (KMP_ARCH_X86)
1546 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1547  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1548 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1549  1) // __kmpc_atomic_cmplx16_div_a16_rev
1550 #endif // KMP_ARCH_X86
1551 #endif // KMP_HAVE_QUAD
1552 
1553 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1554 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1555 
1556 /* ------------------------------------------------------------------------ */
1557 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1558 /* Note: in order to reduce the total number of types combinations */
1559 /* it is supposed that compiler converts RHS to longest floating type,*/
1560 /* that is _Quad, before call to any of these routines */
1561 /* Conversion to _Quad will be done by the compiler during calculation, */
1562 /* conversion back to TYPE - before the assignment, like: */
1563 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1564 /* Performance penalty expected because of SW emulation use */
1565 /* ------------------------------------------------------------------------ */
1566 
1567 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1568  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1569  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1570  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1571  KA_TRACE(100, \
1572  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1573  gtid));
1574 
1575 // -------------------------------------------------------------------------
1576 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1577  GOMP_FLAG) \
1578  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1579  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1580  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1581  }
1582 
1583 // -------------------------------------------------------------------------
1584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1585 // -------------------------------------------------------------------------
1586 // X86 or X86_64: no alignment problems ====================================
1587 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1588  LCK_ID, MASK, GOMP_FLAG) \
1589  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1590  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1591  OP_CMPXCHG(TYPE, BITS, OP) \
1592  }
1593 // -------------------------------------------------------------------------
1594 #else
1595 // ------------------------------------------------------------------------
1596 // Code for other architectures that don't handle unaligned accesses.
1597 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1598  LCK_ID, MASK, GOMP_FLAG) \
1599  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1600  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1601  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1602  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1603  } else { \
1604  KMP_CHECK_GTID; \
1605  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1606  } \
1607  }
1608 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1609 
1610 // -------------------------------------------------------------------------
1611 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1612 // -------------------------------------------------------------------------
1613 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1614  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1615  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1616  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1617  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1618  }
1619 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1620  LCK_ID, GOMP_FLAG) \
1621  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1622  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1623  OP_CRITICAL_REV(OP, LCK_ID) \
1624  }
1625 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1626 
1627 // RHS=float8
1628 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1629  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1630 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1631  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1632 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1633  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1634 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1635  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1636 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1637  0) // __kmpc_atomic_fixed4_mul_float8
1638 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1639  0) // __kmpc_atomic_fixed4_div_float8
1640 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1641  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1642 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1643  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1644 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1645  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1646 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1647  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1649  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1651  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1652 
1653 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1654 // use them)
1655 #if KMP_HAVE_QUAD
1656 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1657  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1658 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1659  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1660 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1661  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1663  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1664 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1665  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1667  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1668 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1669  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1671  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1672 
1673 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1674  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1675 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1676  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1677 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1678  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1680  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1681 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1682  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1684  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1685 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1686  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1688  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1689 
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1691  0) // __kmpc_atomic_fixed4_add_fp
1692 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1693  0) // __kmpc_atomic_fixed4u_add_fp
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1695  0) // __kmpc_atomic_fixed4_sub_fp
1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1697  0) // __kmpc_atomic_fixed4u_sub_fp
1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1699  0) // __kmpc_atomic_fixed4_mul_fp
1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1701  0) // __kmpc_atomic_fixed4u_mul_fp
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1703  0) // __kmpc_atomic_fixed4_div_fp
1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1705  0) // __kmpc_atomic_fixed4u_div_fp
1706 
1707 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1708  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1709 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1710  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1712  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1714  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1716  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1718  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1720  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1722  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1723 
1724 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1725  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1726 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1727  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1729  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1731  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1732 
1733 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1734  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1735 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1736  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1738  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1740  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1741 
1742 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1743  1) // __kmpc_atomic_float10_add_fp
1744 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1745  1) // __kmpc_atomic_float10_sub_fp
1746 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1747  1) // __kmpc_atomic_float10_mul_fp
1748 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1749  1) // __kmpc_atomic_float10_div_fp
1750 
1751 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1752 // Reverse operations
1753 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1754  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1755 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1756  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1758  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1760  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1761 
1762 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1763  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1765  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1767  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1769  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1770 
1771 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1772  0) // __kmpc_atomic_fixed4_sub_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1774  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1776  0) // __kmpc_atomic_fixed4_div_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1778  0) // __kmpc_atomic_fixed4u_div_rev_fp
1779 
1780 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1781  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1783  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1785  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1787  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1788 
1789 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1790  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1792  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1793 
1794 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1795  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1797  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1798 
1799 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1800  1) // __kmpc_atomic_float10_sub_rev_fp
1801 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1802  1) // __kmpc_atomic_float10_div_rev_fp
1803 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1804 
1805 #endif // KMP_HAVE_QUAD
1806 
1807 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1808 // ------------------------------------------------------------------------
1809 // X86 or X86_64: no alignment problems ====================================
1810 #if USE_CMPXCHG_FIX
1811 // workaround for C78287 (complex(kind=4) data type)
1812 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1813  LCK_ID, MASK, GOMP_FLAG) \
1814  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1815  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1816  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1817  }
1818 // end of the second part of the workaround for C78287
1819 #else
1820 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1821  LCK_ID, MASK, GOMP_FLAG) \
1822  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1823  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1824  OP_CMPXCHG(TYPE, BITS, OP) \
1825  }
1826 #endif // USE_CMPXCHG_FIX
1827 #else
1828 // ------------------------------------------------------------------------
1829 // Code for other architectures that don't handle unaligned accesses.
1830 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1831  LCK_ID, MASK, GOMP_FLAG) \
1832  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1833  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1834  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1835  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1836  } else { \
1837  KMP_CHECK_GTID; \
1838  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1839  } \
1840  }
1841 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1842 
1843 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1844  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1845 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1846  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1848  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1850  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1851 
1852 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1853 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1854 
1855 // ------------------------------------------------------------------------
1856 // Atomic READ routines
1857 
1858 // ------------------------------------------------------------------------
1859 // Beginning of a definition (provides name, parameters, gebug trace)
1860 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1861 // fixed)
1862 // OP_ID - operation identifier (add, sub, mul, ...)
1863 // TYPE - operands' type
1864 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1865  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1866  TYPE *loc) { \
1867  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1868  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1869 
1870 // ------------------------------------------------------------------------
1871 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1872 // TYPE - operands' type
1873 // BITS - size in bits, used to distinguish low level calls
1874 // OP - operator
1875 // Note: temp_val introduced in order to force the compiler to read
1876 // *lhs only once (w/o it the compiler reads *lhs twice)
1877 // TODO: check if it is still necessary
1878 // Return old value regardless of the result of "compare & swap# operation
1879 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1880  { \
1881  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1882  union f_i_union { \
1883  TYPE f_val; \
1884  kmp_int##BITS i_val; \
1885  }; \
1886  union f_i_union old_value; \
1887  temp_val = *loc; \
1888  old_value.f_val = temp_val; \
1889  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1890  (kmp_int##BITS *)loc, \
1891  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1892  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1893  new_value = old_value.f_val; \
1894  return new_value; \
1895  }
1896 
1897 // -------------------------------------------------------------------------
1898 // Operation on *lhs, rhs bound by critical section
1899 // OP - operator (it's supposed to contain an assignment)
1900 // LCK_ID - lock identifier
1901 // Note: don't check gtid as it should always be valid
1902 // 1, 2-byte - expect valid parameter, other - check before this macro
1903 #define OP_CRITICAL_READ(OP, LCK_ID) \
1904  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1905  \
1906  new_value = (*loc); \
1907  \
1908  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1909 
1910 // -------------------------------------------------------------------------
1911 #ifdef KMP_GOMP_COMPAT
1912 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1913  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1914  KMP_CHECK_GTID; \
1915  OP_CRITICAL_READ(OP, 0); \
1916  return new_value; \
1917  }
1918 #else
1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1920 #endif /* KMP_GOMP_COMPAT */
1921 
1922 // -------------------------------------------------------------------------
1923 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1924  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1925  TYPE new_value; \
1926  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1927  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1928  return new_value; \
1929  }
1930 // -------------------------------------------------------------------------
1931 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1932  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1933  TYPE new_value; \
1934  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1935  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1936  }
1937 // ------------------------------------------------------------------------
1938 // Routines for Extended types: long double, _Quad, complex flavours (use
1939 // critical section)
1940 // TYPE_ID, OP_ID, TYPE - detailed above
1941 // OP - operator
1942 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1943 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1944  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1945  TYPE new_value; \
1946  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1947  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1948  return new_value; \
1949  }
1950 
1951 // ------------------------------------------------------------------------
1952 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1953 // value doesn't work.
1954 // Let's return the read value through the additional parameter.
1955 #if (KMP_OS_WINDOWS)
1956 
1957 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1958  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1959  \
1960  (*out) = (*loc); \
1961  \
1962  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1963 // ------------------------------------------------------------------------
1964 #ifdef KMP_GOMP_COMPAT
1965 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1966  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1967  KMP_CHECK_GTID; \
1968  OP_CRITICAL_READ_WRK(OP, 0); \
1969  }
1970 #else
1971 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1972 #endif /* KMP_GOMP_COMPAT */
1973 // ------------------------------------------------------------------------
1974 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1975  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1976  TYPE *loc) { \
1977  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1978  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1979 
1980 // ------------------------------------------------------------------------
1981 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1982  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1983  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1984  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1985  }
1986 
1987 #endif // KMP_OS_WINDOWS
1988 
1989 // ------------------------------------------------------------------------
1990 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1991 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1992 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1993  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1994 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1995  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
1996 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
1997  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
1998 
1999 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2000 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2001  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2002 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2003  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2004 
2005 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2006  1) // __kmpc_atomic_float10_rd
2007 #if KMP_HAVE_QUAD
2008 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2009  1) // __kmpc_atomic_float16_rd
2010 #endif // KMP_HAVE_QUAD
2011 
2012 // Fix for CQ220361 on Windows* OS
2013 #if (KMP_OS_WINDOWS)
2014 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2015  1) // __kmpc_atomic_cmplx4_rd
2016 #else
2017 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2018  1) // __kmpc_atomic_cmplx4_rd
2019 #endif // (KMP_OS_WINDOWS)
2020 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2021  1) // __kmpc_atomic_cmplx8_rd
2022 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2023  1) // __kmpc_atomic_cmplx10_rd
2024 #if KMP_HAVE_QUAD
2025 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2026  1) // __kmpc_atomic_cmplx16_rd
2027 #if (KMP_ARCH_X86)
2028 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2029  1) // __kmpc_atomic_float16_a16_rd
2030 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2031  1) // __kmpc_atomic_cmplx16_a16_rd
2032 #endif // (KMP_ARCH_X86)
2033 #endif // KMP_HAVE_QUAD
2034 
2035 // ------------------------------------------------------------------------
2036 // Atomic WRITE routines
2037 
2038 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2039  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2040  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2041  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2042  }
2043 // ------------------------------------------------------------------------
2044 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2045  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2046  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2047  KMP_XCHG_REAL##BITS(lhs, rhs); \
2048  }
2049 
2050 // ------------------------------------------------------------------------
2051 // Operation on *lhs, rhs using "compare_and_store" routine
2052 // TYPE - operands' type
2053 // BITS - size in bits, used to distinguish low level calls
2054 // OP - operator
2055 // Note: temp_val introduced in order to force the compiler to read
2056 // *lhs only once (w/o it the compiler reads *lhs twice)
2057 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2058  { \
2059  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2060  TYPE old_value, new_value; \
2061  temp_val = *lhs; \
2062  old_value = temp_val; \
2063  new_value = rhs; \
2064  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2065  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2066  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2067  KMP_CPU_PAUSE(); \
2068  \
2069  temp_val = *lhs; \
2070  old_value = temp_val; \
2071  new_value = rhs; \
2072  } \
2073  }
2074 
2075 // -------------------------------------------------------------------------
2076 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2077  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2078  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2079  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2080  }
2081 
2082 // ------------------------------------------------------------------------
2083 // Routines for Extended types: long double, _Quad, complex flavours (use
2084 // critical section)
2085 // TYPE_ID, OP_ID, TYPE - detailed above
2086 // OP - operator
2087 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2088 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2089  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2090  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2091  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2092  }
2093 // -------------------------------------------------------------------------
2094 
2095 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2096  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2097 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2098  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2099 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2100  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2101 #if (KMP_ARCH_X86)
2102 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2103  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2104 #else
2105 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2106  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2107 #endif // (KMP_ARCH_X86)
2108 
2109 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2110  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2111 #if (KMP_ARCH_X86)
2112 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2113  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2114 #else
2115 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2116  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2117 #endif // (KMP_ARCH_X86)
2118 
2119 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2120  1) // __kmpc_atomic_float10_wr
2121 #if KMP_HAVE_QUAD
2122 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2123  1) // __kmpc_atomic_float16_wr
2124 #endif // KMP_HAVE_QUAD
2125 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2126 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2127  1) // __kmpc_atomic_cmplx8_wr
2128 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2129  1) // __kmpc_atomic_cmplx10_wr
2130 #if KMP_HAVE_QUAD
2131 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2132  1) // __kmpc_atomic_cmplx16_wr
2133 #if (KMP_ARCH_X86)
2134 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2135  1) // __kmpc_atomic_float16_a16_wr
2136 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2137  1) // __kmpc_atomic_cmplx16_a16_wr
2138 #endif // (KMP_ARCH_X86)
2139 #endif // KMP_HAVE_QUAD
2140 
2141 // ------------------------------------------------------------------------
2142 // Atomic CAPTURE routines
2143 
2144 // Beginning of a definition (provides name, parameters, gebug trace)
2145 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2146 // fixed)
2147 // OP_ID - operation identifier (add, sub, mul, ...)
2148 // TYPE - operands' type
2149 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2150  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2151  TYPE *lhs, TYPE rhs, int flag) { \
2152  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2153  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2154 
2155 // -------------------------------------------------------------------------
2156 // Operation on *lhs, rhs bound by critical section
2157 // OP - operator (it's supposed to contain an assignment)
2158 // LCK_ID - lock identifier
2159 // Note: don't check gtid as it should always be valid
2160 // 1, 2-byte - expect valid parameter, other - check before this macro
2161 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2162  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2163  \
2164  if (flag) { \
2165  (*lhs) OP rhs; \
2166  new_value = (*lhs); \
2167  } else { \
2168  new_value = (*lhs); \
2169  (*lhs) OP rhs; \
2170  } \
2171  \
2172  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2173  return new_value;
2174 
2175 // ------------------------------------------------------------------------
2176 #ifdef KMP_GOMP_COMPAT
2177 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2178  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2179  KMP_CHECK_GTID; \
2180  OP_CRITICAL_CPT(OP## =, 0); \
2181  }
2182 #else
2183 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2184 #endif /* KMP_GOMP_COMPAT */
2185 
2186 // ------------------------------------------------------------------------
2187 // Operation on *lhs, rhs using "compare_and_store" routine
2188 // TYPE - operands' type
2189 // BITS - size in bits, used to distinguish low level calls
2190 // OP - operator
2191 // Note: temp_val introduced in order to force the compiler to read
2192 // *lhs only once (w/o it the compiler reads *lhs twice)
2193 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2194  { \
2195  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2196  TYPE old_value, new_value; \
2197  temp_val = *lhs; \
2198  old_value = temp_val; \
2199  new_value = old_value OP rhs; \
2200  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2201  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2202  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2203  KMP_CPU_PAUSE(); \
2204  \
2205  temp_val = *lhs; \
2206  old_value = temp_val; \
2207  new_value = old_value OP rhs; \
2208  } \
2209  if (flag) { \
2210  return new_value; \
2211  } else \
2212  return old_value; \
2213  }
2214 
2215 // -------------------------------------------------------------------------
2216 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2217  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2218  TYPE new_value; \
2219  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2220  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2221  }
2222 
2223 // -------------------------------------------------------------------------
2224 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2225  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2226  TYPE old_value, new_value; \
2227  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2228  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2229  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2230  if (flag) { \
2231  return old_value OP rhs; \
2232  } else \
2233  return old_value; \
2234  }
2235 // -------------------------------------------------------------------------
2236 
2237 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2238  0) // __kmpc_atomic_fixed4_add_cpt
2239 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2240  0) // __kmpc_atomic_fixed4_sub_cpt
2241 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2242  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2243 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2244  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2245 
2246 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2247  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2248 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2249  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2250 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2251  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2252 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2253  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2254 
2255 // ------------------------------------------------------------------------
2256 // Entries definition for integer operands
2257 // TYPE_ID - operands type and size (fixed4, float4)
2258 // OP_ID - operation identifier (add, sub, mul, ...)
2259 // TYPE - operand type
2260 // BITS - size in bits, used to distinguish low level calls
2261 // OP - operator (used in critical section)
2262 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2263 // ------------------------------------------------------------------------
2264 // Routines for ATOMIC integer operands, other operators
2265 // ------------------------------------------------------------------------
2266 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2267 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2268  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2269 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2270  0) // __kmpc_atomic_fixed1_andb_cpt
2271 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2272  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2273 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2274  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2275 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2276  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2277 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2278  0) // __kmpc_atomic_fixed1_orb_cpt
2279 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2280  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2281 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2282  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2283 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2284  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2285 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2286  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2288  0) // __kmpc_atomic_fixed1_xor_cpt
2289 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2290  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2291 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2292  0) // __kmpc_atomic_fixed2_andb_cpt
2293 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2294  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2295 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2296  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2297 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2298  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2299 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2300  0) // __kmpc_atomic_fixed2_orb_cpt
2301 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2302  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2304  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2306  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2308  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2310  0) // __kmpc_atomic_fixed2_xor_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2312  0) // __kmpc_atomic_fixed4_andb_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2314  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2316  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2318  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2320  0) // __kmpc_atomic_fixed4_orb_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2322  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2324  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2326  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2328  0) // __kmpc_atomic_fixed4_xor_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2330  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2332  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2334  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2336  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2338  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2340  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2342  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2344  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2346  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2347 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2348  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2349 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2350  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2351 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2352  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2353 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2354  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2355 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2356 
2357 // CAPTURE routines for mixed types RHS=float16
2358 #if KMP_HAVE_QUAD
2359 
2360 // Beginning of a definition (provides name, parameters, gebug trace)
2361 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2362 // fixed)
2363 // OP_ID - operation identifier (add, sub, mul, ...)
2364 // TYPE - operands' type
2365 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2366  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2367  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2368  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2369  KA_TRACE(100, \
2370  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2371  gtid));
2372 
2373 // -------------------------------------------------------------------------
2374 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2375  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2376  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2377  TYPE new_value; \
2378  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2379  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2380  }
2381 
2382 // -------------------------------------------------------------------------
2383 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2384  LCK_ID, GOMP_FLAG) \
2385  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2386  TYPE new_value; \
2387  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2388  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2389  }
2390 
2391 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2392  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2393 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2394  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2396  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2398  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2400  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2402  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2404  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2406  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2407 
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2415  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2417  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2419  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2421  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2423  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2424 
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2426  0) // __kmpc_atomic_fixed4_add_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2428  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2430  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2432  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2434  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2436  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2438  0) // __kmpc_atomic_fixed4_div_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2440  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2441 
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2443  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2445  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2447  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2449  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2451  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2453  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2455  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2457  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2458 
2459 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2460  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2462  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2464  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2466  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2467 
2468 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2469  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2471  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2473  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2475  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2476 
2477 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2478  1) // __kmpc_atomic_float10_add_cpt_fp
2479 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2480  1) // __kmpc_atomic_float10_sub_cpt_fp
2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2482  1) // __kmpc_atomic_float10_mul_cpt_fp
2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2484  1) // __kmpc_atomic_float10_div_cpt_fp
2485 
2486 #endif // KMP_HAVE_QUAD
2487 
2488 // ------------------------------------------------------------------------
2489 // Routines for C/C++ Reduction operators && and ||
2490 
2491 // -------------------------------------------------------------------------
2492 // Operation on *lhs, rhs bound by critical section
2493 // OP - operator (it's supposed to contain an assignment)
2494 // LCK_ID - lock identifier
2495 // Note: don't check gtid as it should always be valid
2496 // 1, 2-byte - expect valid parameter, other - check before this macro
2497 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2498  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2499  \
2500  if (flag) { \
2501  new_value OP rhs; \
2502  } else \
2503  new_value = (*lhs); \
2504  \
2505  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2506 
2507 // ------------------------------------------------------------------------
2508 #ifdef KMP_GOMP_COMPAT
2509 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2510  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2511  KMP_CHECK_GTID; \
2512  OP_CRITICAL_L_CPT(OP, 0); \
2513  return new_value; \
2514  }
2515 #else
2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2517 #endif /* KMP_GOMP_COMPAT */
2518 
2519 // ------------------------------------------------------------------------
2520 // Need separate macros for &&, || because there is no combined assignment
2521 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2522  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2523  TYPE new_value; \
2524  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2525  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2526  }
2527 
2528 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2529  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2530 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2531  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2532 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2533  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2534 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2535  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2536 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2537  0) // __kmpc_atomic_fixed4_andl_cpt
2538 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2539  0) // __kmpc_atomic_fixed4_orl_cpt
2540 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2541  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2542 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2543  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2544 
2545 // -------------------------------------------------------------------------
2546 // Routines for Fortran operators that matched no one in C:
2547 // MAX, MIN, .EQV., .NEQV.
2548 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2549 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2550 
2551 // -------------------------------------------------------------------------
2552 // MIN and MAX need separate macros
2553 // OP - operator to check if we need any actions?
2554 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2555  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2556  \
2557  if (*lhs OP rhs) { /* still need actions? */ \
2558  old_value = *lhs; \
2559  *lhs = rhs; \
2560  if (flag) \
2561  new_value = rhs; \
2562  else \
2563  new_value = old_value; \
2564  } else { \
2565  new_value = *lhs; \
2566  } \
2567  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2568  return new_value;
2569 
2570 // -------------------------------------------------------------------------
2571 #ifdef KMP_GOMP_COMPAT
2572 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2573  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2574  KMP_CHECK_GTID; \
2575  MIN_MAX_CRITSECT_CPT(OP, 0); \
2576  }
2577 #else
2578 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2579 #endif /* KMP_GOMP_COMPAT */
2580 
2581 // -------------------------------------------------------------------------
2582 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2583  { \
2584  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2585  /*TYPE old_value; */ \
2586  temp_val = *lhs; \
2587  old_value = temp_val; \
2588  while (old_value OP rhs && /* still need actions? */ \
2589  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2590  (kmp_int##BITS *)lhs, \
2591  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2592  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2593  KMP_CPU_PAUSE(); \
2594  temp_val = *lhs; \
2595  old_value = temp_val; \
2596  } \
2597  if (flag) \
2598  return rhs; \
2599  else \
2600  return old_value; \
2601  }
2602 
2603 // -------------------------------------------------------------------------
2604 // 1-byte, 2-byte operands - use critical section
2605 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2606  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2607  TYPE new_value, old_value; \
2608  if (*lhs OP rhs) { /* need actions? */ \
2609  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2610  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2611  } \
2612  return *lhs; \
2613  }
2614 
2615 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2616  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2617  TYPE new_value, old_value; \
2618  if (*lhs OP rhs) { \
2619  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2620  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2621  } \
2622  return *lhs; \
2623  }
2624 
2625 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2626  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2627 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2628  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2629 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2630  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2631 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2632  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2633 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2634  0) // __kmpc_atomic_fixed4_max_cpt
2635 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2636  0) // __kmpc_atomic_fixed4_min_cpt
2637 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2638  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2639 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2640  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2641 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2642  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2643 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2644  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2645 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2646  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2647 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2648  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2649 #if KMP_HAVE_QUAD
2650 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2651  1) // __kmpc_atomic_float16_max_cpt
2652 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2653  1) // __kmpc_atomic_float16_min_cpt
2654 #if (KMP_ARCH_X86)
2655 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2656  1) // __kmpc_atomic_float16_max_a16_cpt
2657 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2658  1) // __kmpc_atomic_float16_mix_a16_cpt
2659 #endif // (KMP_ARCH_X86)
2660 #endif // KMP_HAVE_QUAD
2661 
2662 // ------------------------------------------------------------------------
2663 #ifdef KMP_GOMP_COMPAT
2664 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2665  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2666  KMP_CHECK_GTID; \
2667  OP_CRITICAL_CPT(OP, 0); \
2668  }
2669 #else
2670 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2671 #endif /* KMP_GOMP_COMPAT */
2672 // ------------------------------------------------------------------------
2673 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2674  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2675  TYPE new_value; \
2676  OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2677  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2678  }
2679 
2680 // ------------------------------------------------------------------------
2681 
2682 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2683  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2684 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2685  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2686 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2687  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2688 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2689  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2690 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2691  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2692 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2693  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2694 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2695  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2696 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2697  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2698 
2699 // ------------------------------------------------------------------------
2700 // Routines for Extended types: long double, _Quad, complex flavours (use
2701 // critical section)
2702 // TYPE_ID, OP_ID, TYPE - detailed above
2703 // OP - operator
2704 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2705 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2706  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2707  TYPE new_value; \
2708  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2709  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2710  }
2711 
2712 // ------------------------------------------------------------------------
2713 // Workaround for cmplx4. Regular routines with return value don't work
2714 // on Win_32e. Let's return captured values through the additional parameter.
2715 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2716  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2717  \
2718  if (flag) { \
2719  (*lhs) OP rhs; \
2720  (*out) = (*lhs); \
2721  } else { \
2722  (*out) = (*lhs); \
2723  (*lhs) OP rhs; \
2724  } \
2725  \
2726  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2727  return;
2728 // ------------------------------------------------------------------------
2729 
2730 #ifdef KMP_GOMP_COMPAT
2731 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2732  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2733  KMP_CHECK_GTID; \
2734  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2735  }
2736 #else
2737 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2738 #endif /* KMP_GOMP_COMPAT */
2739 // ------------------------------------------------------------------------
2740 
2741 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2742  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2743  TYPE rhs, TYPE *out, int flag) { \
2744  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2745  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2746 // ------------------------------------------------------------------------
2747 
2748 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2749  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2750  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2751  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2752  }
2753 // The end of workaround for cmplx4
2754 
2755 /* ------------------------------------------------------------------------- */
2756 // routines for long double type
2757 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2758  1) // __kmpc_atomic_float10_add_cpt
2759 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2760  1) // __kmpc_atomic_float10_sub_cpt
2761 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2762  1) // __kmpc_atomic_float10_mul_cpt
2763 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2764  1) // __kmpc_atomic_float10_div_cpt
2765 #if KMP_HAVE_QUAD
2766 // routines for _Quad type
2767 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2768  1) // __kmpc_atomic_float16_add_cpt
2769 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2770  1) // __kmpc_atomic_float16_sub_cpt
2771 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2772  1) // __kmpc_atomic_float16_mul_cpt
2773 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2774  1) // __kmpc_atomic_float16_div_cpt
2775 #if (KMP_ARCH_X86)
2776 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2777  1) // __kmpc_atomic_float16_add_a16_cpt
2778 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2779  1) // __kmpc_atomic_float16_sub_a16_cpt
2780 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2781  1) // __kmpc_atomic_float16_mul_a16_cpt
2782 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2783  1) // __kmpc_atomic_float16_div_a16_cpt
2784 #endif // (KMP_ARCH_X86)
2785 #endif // KMP_HAVE_QUAD
2786 
2787 // routines for complex types
2788 
2789 // cmplx4 routines to return void
2790 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2791  1) // __kmpc_atomic_cmplx4_add_cpt
2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2793  1) // __kmpc_atomic_cmplx4_sub_cpt
2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2795  1) // __kmpc_atomic_cmplx4_mul_cpt
2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2797  1) // __kmpc_atomic_cmplx4_div_cpt
2798 
2799 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2800  1) // __kmpc_atomic_cmplx8_add_cpt
2801 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2802  1) // __kmpc_atomic_cmplx8_sub_cpt
2803 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2804  1) // __kmpc_atomic_cmplx8_mul_cpt
2805 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2806  1) // __kmpc_atomic_cmplx8_div_cpt
2807 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2808  1) // __kmpc_atomic_cmplx10_add_cpt
2809 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2810  1) // __kmpc_atomic_cmplx10_sub_cpt
2811 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2812  1) // __kmpc_atomic_cmplx10_mul_cpt
2813 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2814  1) // __kmpc_atomic_cmplx10_div_cpt
2815 #if KMP_HAVE_QUAD
2816 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2817  1) // __kmpc_atomic_cmplx16_add_cpt
2818 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2819  1) // __kmpc_atomic_cmplx16_sub_cpt
2820 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2821  1) // __kmpc_atomic_cmplx16_mul_cpt
2822 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2823  1) // __kmpc_atomic_cmplx16_div_cpt
2824 #if (KMP_ARCH_X86)
2825 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2826  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2827 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2828  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2829 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2830  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2831 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2832  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2833 #endif // (KMP_ARCH_X86)
2834 #endif // KMP_HAVE_QUAD
2835 
2836 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2837 // binop x; v = x; } for non-commutative operations.
2838 // Supported only on IA-32 architecture and Intel(R) 64
2839 
2840 // -------------------------------------------------------------------------
2841 // Operation on *lhs, rhs bound by critical section
2842 // OP - operator (it's supposed to contain an assignment)
2843 // LCK_ID - lock identifier
2844 // Note: don't check gtid as it should always be valid
2845 // 1, 2-byte - expect valid parameter, other - check before this macro
2846 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2847  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2848  \
2849  if (flag) { \
2850  /*temp_val = (*lhs);*/ \
2851  (*lhs) = (rhs)OP(*lhs); \
2852  new_value = (*lhs); \
2853  } else { \
2854  new_value = (*lhs); \
2855  (*lhs) = (rhs)OP(*lhs); \
2856  } \
2857  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2858  return new_value;
2859 
2860 // ------------------------------------------------------------------------
2861 #ifdef KMP_GOMP_COMPAT
2862 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2863  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2864  KMP_CHECK_GTID; \
2865  OP_CRITICAL_CPT_REV(OP, 0); \
2866  }
2867 #else
2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2869 #endif /* KMP_GOMP_COMPAT */
2870 
2871 // ------------------------------------------------------------------------
2872 // Operation on *lhs, rhs using "compare_and_store" routine
2873 // TYPE - operands' type
2874 // BITS - size in bits, used to distinguish low level calls
2875 // OP - operator
2876 // Note: temp_val introduced in order to force the compiler to read
2877 // *lhs only once (w/o it the compiler reads *lhs twice)
2878 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2879  { \
2880  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2881  TYPE old_value, new_value; \
2882  temp_val = *lhs; \
2883  old_value = temp_val; \
2884  new_value = rhs OP old_value; \
2885  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2886  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2887  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2888  KMP_CPU_PAUSE(); \
2889  \
2890  temp_val = *lhs; \
2891  old_value = temp_val; \
2892  new_value = rhs OP old_value; \
2893  } \
2894  if (flag) { \
2895  return new_value; \
2896  } else \
2897  return old_value; \
2898  }
2899 
2900 // -------------------------------------------------------------------------
2901 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2902  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2903  TYPE new_value; \
2904  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2905  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2906  }
2907 
2908 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2909  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2910 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2911  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2913  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2915  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2917  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2919  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2921  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2923  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2925  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2927  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2929  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2931  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2933  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2935  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2937  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2939  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2941  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2943  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2945  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2947  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2957  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2959  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2961  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2963  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2964 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2965 
2966 // ------------------------------------------------------------------------
2967 // Routines for Extended types: long double, _Quad, complex flavours (use
2968 // critical section)
2969 // TYPE_ID, OP_ID, TYPE - detailed above
2970 // OP - operator
2971 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2972 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2973  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2974  TYPE new_value; \
2975  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2976  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2977  OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2978  }
2979 
2980 /* ------------------------------------------------------------------------- */
2981 // routines for long double type
2982 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2983  1) // __kmpc_atomic_float10_sub_cpt_rev
2984 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2985  1) // __kmpc_atomic_float10_div_cpt_rev
2986 #if KMP_HAVE_QUAD
2987 // routines for _Quad type
2988 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2989  1) // __kmpc_atomic_float16_sub_cpt_rev
2990 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2991  1) // __kmpc_atomic_float16_div_cpt_rev
2992 #if (KMP_ARCH_X86)
2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2994  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2995 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
2996  1) // __kmpc_atomic_float16_div_a16_cpt_rev
2997 #endif // (KMP_ARCH_X86)
2998 #endif // KMP_HAVE_QUAD
2999 
3000 // routines for complex types
3001 
3002 // ------------------------------------------------------------------------
3003 // Workaround for cmplx4. Regular routines with return value don't work
3004 // on Win_32e. Let's return captured values through the additional parameter.
3005 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3006  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3007  \
3008  if (flag) { \
3009  (*lhs) = (rhs)OP(*lhs); \
3010  (*out) = (*lhs); \
3011  } else { \
3012  (*out) = (*lhs); \
3013  (*lhs) = (rhs)OP(*lhs); \
3014  } \
3015  \
3016  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3017  return;
3018 // ------------------------------------------------------------------------
3019 
3020 #ifdef KMP_GOMP_COMPAT
3021 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3022  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3023  KMP_CHECK_GTID; \
3024  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3025  }
3026 #else
3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3028 #endif /* KMP_GOMP_COMPAT */
3029 // ------------------------------------------------------------------------
3030 
3031 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3032  GOMP_FLAG) \
3033  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3034  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3035  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3036  }
3037 // The end of workaround for cmplx4
3038 
3039 // !!! TODO: check if we need to return void for cmplx4 routines
3040 // cmplx4 routines to return void
3041 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3042  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3043 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3044  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3045 
3046 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3047  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3048 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3049  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3050 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3051  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3053  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3054 #if KMP_HAVE_QUAD
3055 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3056  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3058  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3059 #if (KMP_ARCH_X86)
3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3061  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3063  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3064 #endif // (KMP_ARCH_X86)
3065 #endif // KMP_HAVE_QUAD
3066 
3067 // Capture reverse for mixed type: RHS=float16
3068 #if KMP_HAVE_QUAD
3069 
3070 // Beginning of a definition (provides name, parameters, gebug trace)
3071 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3072 // fixed)
3073 // OP_ID - operation identifier (add, sub, mul, ...)
3074 // TYPE - operands' type
3075 // -------------------------------------------------------------------------
3076 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3077  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3078  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3079  TYPE new_value; \
3080  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3081  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3082  }
3083 
3084 // -------------------------------------------------------------------------
3085 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3086  LCK_ID, GOMP_FLAG) \
3087  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3088  TYPE new_value; \
3089  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3090  OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3091  }
3092 
3093 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3094  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3095 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3096  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3098  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3100  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3101 
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3103  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3105  1,
3106  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3108  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3110  1,
3111  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3112 
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3114  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3116  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3118  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3120  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3121 
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3123  7,
3124  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3126  8i, 7,
3127  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3129  7,
3130  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3132  8i, 7,
3133  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3134 
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3136  4r, 3,
3137  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3138 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3139  4r, 3,
3140  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3141 
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3143  8r, 7,
3144  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3146  8r, 7,
3147  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3148 
3149 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3150  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3151 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3152  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3153 
3154 #endif // KMP_HAVE_QUAD
3155 
3156 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3157 
3158 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3159  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3160  TYPE rhs) { \
3161  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3162  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3163 
3164 #define CRITICAL_SWP(LCK_ID) \
3165  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3166  \
3167  old_value = (*lhs); \
3168  (*lhs) = rhs; \
3169  \
3170  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3171  return old_value;
3172 
3173 // ------------------------------------------------------------------------
3174 #ifdef KMP_GOMP_COMPAT
3175 #define GOMP_CRITICAL_SWP(FLAG) \
3176  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3177  KMP_CHECK_GTID; \
3178  CRITICAL_SWP(0); \
3179  }
3180 #else
3181 #define GOMP_CRITICAL_SWP(FLAG)
3182 #endif /* KMP_GOMP_COMPAT */
3183 
3184 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3185  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3186  TYPE old_value; \
3187  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3188  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3189  return old_value; \
3190  }
3191 // ------------------------------------------------------------------------
3192 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3193  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3194  TYPE old_value; \
3195  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3196  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3197  return old_value; \
3198  }
3199 
3200 // ------------------------------------------------------------------------
3201 #define CMPXCHG_SWP(TYPE, BITS) \
3202  { \
3203  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3204  TYPE old_value, new_value; \
3205  temp_val = *lhs; \
3206  old_value = temp_val; \
3207  new_value = rhs; \
3208  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3209  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3210  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3211  KMP_CPU_PAUSE(); \
3212  \
3213  temp_val = *lhs; \
3214  old_value = temp_val; \
3215  new_value = rhs; \
3216  } \
3217  return old_value; \
3218  }
3219 
3220 // -------------------------------------------------------------------------
3221 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3222  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3223  TYPE old_value; \
3224  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3225  CMPXCHG_SWP(TYPE, BITS) \
3226  }
3227 
3228 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3229 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3230 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3231 
3232 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3233  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3234 
3235 #if (KMP_ARCH_X86)
3236 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3237  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3238 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3239  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3240 #else
3241 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3243  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244 #endif // (KMP_ARCH_X86)
3245 
3246 // ------------------------------------------------------------------------
3247 // Routines for Extended types: long double, _Quad, complex flavours (use
3248 // critical section)
3249 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3250  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3251  TYPE old_value; \
3252  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3253  CRITICAL_SWP(LCK_ID) \
3254  }
3255 
3256 // ------------------------------------------------------------------------
3257 // !!! TODO: check if we need to return void for cmplx4 routines
3258 // Workaround for cmplx4. Regular routines with return value don't work
3259 // on Win_32e. Let's return captured values through the additional parameter.
3260 
3261 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3262  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3263  TYPE rhs, TYPE *out) { \
3264  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3265  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3266 
3267 #define CRITICAL_SWP_WRK(LCK_ID) \
3268  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3269  \
3270  tmp = (*lhs); \
3271  (*lhs) = (rhs); \
3272  (*out) = tmp; \
3273  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3274  return;
3275 // ------------------------------------------------------------------------
3276 
3277 #ifdef KMP_GOMP_COMPAT
3278 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3279  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3280  KMP_CHECK_GTID; \
3281  CRITICAL_SWP_WRK(0); \
3282  }
3283 #else
3284 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3285 #endif /* KMP_GOMP_COMPAT */
3286 // ------------------------------------------------------------------------
3287 
3288 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3289  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3290  TYPE tmp; \
3291  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3292  CRITICAL_SWP_WRK(LCK_ID) \
3293  }
3294 // The end of workaround for cmplx4
3295 
3296 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3297 #if KMP_HAVE_QUAD
3298 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3299 #endif // KMP_HAVE_QUAD
3300 // cmplx4 routine to return void
3301 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3302 
3303 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3304 // __kmpc_atomic_cmplx4_swp
3305 
3306 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3307 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3308 #if KMP_HAVE_QUAD
3309 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3310 #if (KMP_ARCH_X86)
3311 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3312  1) // __kmpc_atomic_float16_a16_swp
3313 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3314  1) // __kmpc_atomic_cmplx16_a16_swp
3315 #endif // (KMP_ARCH_X86)
3316 #endif // KMP_HAVE_QUAD
3317 
3318 // End of OpenMP 4.0 Capture
3319 
3320 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3321 
3322 #undef OP_CRITICAL
3323 
3324 /* ------------------------------------------------------------------------ */
3325 /* Generic atomic routines */
3326 
3327 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3328  void (*f)(void *, void *, void *)) {
3329  KMP_DEBUG_ASSERT(__kmp_init_serial);
3330 
3331  if (
3332 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3333  FALSE /* must use lock */
3334 #else
3335  TRUE
3336 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3337  ) {
3338  kmp_int8 old_value, new_value;
3339 
3340  old_value = *(kmp_int8 *)lhs;
3341  (*f)(&new_value, &old_value, rhs);
3342 
3343  /* TODO: Should this be acquire or release? */
3344  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3345  *(kmp_int8 *)&new_value)) {
3346  KMP_CPU_PAUSE();
3347 
3348  old_value = *(kmp_int8 *)lhs;
3349  (*f)(&new_value, &old_value, rhs);
3350  }
3351 
3352  return;
3353  } else {
3354 // All 1-byte data is of integer data type.
3355 
3356 #ifdef KMP_GOMP_COMPAT
3357  if (__kmp_atomic_mode == 2) {
3358  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3359  } else
3360 #endif /* KMP_GOMP_COMPAT */
3361  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3362 
3363  (*f)(lhs, lhs, rhs);
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366  if (__kmp_atomic_mode == 2) {
3367  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3368  } else
3369 #endif /* KMP_GOMP_COMPAT */
3370  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3371  }
3372 }
3373 
3374 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3375  void (*f)(void *, void *, void *)) {
3376  if (
3377 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3378  FALSE /* must use lock */
3379 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3380  TRUE /* no alignment problems */
3381 #else
3382  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3383 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3384  ) {
3385  kmp_int16 old_value, new_value;
3386 
3387  old_value = *(kmp_int16 *)lhs;
3388  (*f)(&new_value, &old_value, rhs);
3389 
3390  /* TODO: Should this be acquire or release? */
3391  while (!KMP_COMPARE_AND_STORE_ACQ16(
3392  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3393  KMP_CPU_PAUSE();
3394 
3395  old_value = *(kmp_int16 *)lhs;
3396  (*f)(&new_value, &old_value, rhs);
3397  }
3398 
3399  return;
3400  } else {
3401 // All 2-byte data is of integer data type.
3402 
3403 #ifdef KMP_GOMP_COMPAT
3404  if (__kmp_atomic_mode == 2) {
3405  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3406  } else
3407 #endif /* KMP_GOMP_COMPAT */
3408  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3409 
3410  (*f)(lhs, lhs, rhs);
3411 
3412 #ifdef KMP_GOMP_COMPAT
3413  if (__kmp_atomic_mode == 2) {
3414  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3415  } else
3416 #endif /* KMP_GOMP_COMPAT */
3417  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3418  }
3419 }
3420 
3421 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3422  void (*f)(void *, void *, void *)) {
3423  KMP_DEBUG_ASSERT(__kmp_init_serial);
3424 
3425  if (
3426 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3427 // Gomp compatibility is broken if this routine is called for floats.
3428 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3429  TRUE /* no alignment problems */
3430 #else
3431  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3432 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3433  ) {
3434  kmp_int32 old_value, new_value;
3435 
3436  old_value = *(kmp_int32 *)lhs;
3437  (*f)(&new_value, &old_value, rhs);
3438 
3439  /* TODO: Should this be acquire or release? */
3440  while (!KMP_COMPARE_AND_STORE_ACQ32(
3441  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3442  KMP_CPU_PAUSE();
3443 
3444  old_value = *(kmp_int32 *)lhs;
3445  (*f)(&new_value, &old_value, rhs);
3446  }
3447 
3448  return;
3449  } else {
3450 // Use __kmp_atomic_lock_4i for all 4-byte data,
3451 // even if it isn't of integer data type.
3452 
3453 #ifdef KMP_GOMP_COMPAT
3454  if (__kmp_atomic_mode == 2) {
3455  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3456  } else
3457 #endif /* KMP_GOMP_COMPAT */
3458  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3459 
3460  (*f)(lhs, lhs, rhs);
3461 
3462 #ifdef KMP_GOMP_COMPAT
3463  if (__kmp_atomic_mode == 2) {
3464  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3465  } else
3466 #endif /* KMP_GOMP_COMPAT */
3467  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3468  }
3469 }
3470 
3471 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3472  void (*f)(void *, void *, void *)) {
3473  KMP_DEBUG_ASSERT(__kmp_init_serial);
3474  if (
3475 
3476 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3477  FALSE /* must use lock */
3478 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3479  TRUE /* no alignment problems */
3480 #else
3481  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3482 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3483  ) {
3484  kmp_int64 old_value, new_value;
3485 
3486  old_value = *(kmp_int64 *)lhs;
3487  (*f)(&new_value, &old_value, rhs);
3488  /* TODO: Should this be acquire or release? */
3489  while (!KMP_COMPARE_AND_STORE_ACQ64(
3490  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3491  KMP_CPU_PAUSE();
3492 
3493  old_value = *(kmp_int64 *)lhs;
3494  (*f)(&new_value, &old_value, rhs);
3495  }
3496 
3497  return;
3498  } else {
3499 // Use __kmp_atomic_lock_8i for all 8-byte data,
3500 // even if it isn't of integer data type.
3501 
3502 #ifdef KMP_GOMP_COMPAT
3503  if (__kmp_atomic_mode == 2) {
3504  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3505  } else
3506 #endif /* KMP_GOMP_COMPAT */
3507  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3508 
3509  (*f)(lhs, lhs, rhs);
3510 
3511 #ifdef KMP_GOMP_COMPAT
3512  if (__kmp_atomic_mode == 2) {
3513  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3514  } else
3515 #endif /* KMP_GOMP_COMPAT */
3516  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3517  }
3518 }
3519 
3520 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3521  void (*f)(void *, void *, void *)) {
3522  KMP_DEBUG_ASSERT(__kmp_init_serial);
3523 
3524 #ifdef KMP_GOMP_COMPAT
3525  if (__kmp_atomic_mode == 2) {
3526  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3527  } else
3528 #endif /* KMP_GOMP_COMPAT */
3529  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3530 
3531  (*f)(lhs, lhs, rhs);
3532 
3533 #ifdef KMP_GOMP_COMPAT
3534  if (__kmp_atomic_mode == 2) {
3535  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3536  } else
3537 #endif /* KMP_GOMP_COMPAT */
3538  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3539 }
3540 
3541 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542  void (*f)(void *, void *, void *)) {
3543  KMP_DEBUG_ASSERT(__kmp_init_serial);
3544 
3545 #ifdef KMP_GOMP_COMPAT
3546  if (__kmp_atomic_mode == 2) {
3547  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3548  } else
3549 #endif /* KMP_GOMP_COMPAT */
3550  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3551 
3552  (*f)(lhs, lhs, rhs);
3553 
3554 #ifdef KMP_GOMP_COMPAT
3555  if (__kmp_atomic_mode == 2) {
3556  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3557  } else
3558 #endif /* KMP_GOMP_COMPAT */
3559  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3560 }
3561 
3562 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3563  void (*f)(void *, void *, void *)) {
3564  KMP_DEBUG_ASSERT(__kmp_init_serial);
3565 
3566 #ifdef KMP_GOMP_COMPAT
3567  if (__kmp_atomic_mode == 2) {
3568  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3569  } else
3570 #endif /* KMP_GOMP_COMPAT */
3571  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3572 
3573  (*f)(lhs, lhs, rhs);
3574 
3575 #ifdef KMP_GOMP_COMPAT
3576  if (__kmp_atomic_mode == 2) {
3577  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3578  } else
3579 #endif /* KMP_GOMP_COMPAT */
3580  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3581 }
3582 
3583 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3584  void (*f)(void *, void *, void *)) {
3585  KMP_DEBUG_ASSERT(__kmp_init_serial);
3586 
3587 #ifdef KMP_GOMP_COMPAT
3588  if (__kmp_atomic_mode == 2) {
3589  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3590  } else
3591 #endif /* KMP_GOMP_COMPAT */
3592  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3593 
3594  (*f)(lhs, lhs, rhs);
3595 
3596 #ifdef KMP_GOMP_COMPAT
3597  if (__kmp_atomic_mode == 2) {
3598  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3599  } else
3600 #endif /* KMP_GOMP_COMPAT */
3601  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3602 }
3603 
3604 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3605 // compiler; duplicated in order to not use 3-party names in pure Intel code
3606 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3607 void __kmpc_atomic_start(void) {
3608  int gtid = __kmp_entry_gtid();
3609  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3610  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611 }
3612 
3613 void __kmpc_atomic_end(void) {
3614  int gtid = __kmp_get_gtid();
3615  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3616  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3617 }
3618 
3623 // end of file
Definition: kmp.h:222