stk-code_catmod/lib/simd_wrapper/simde/x86/avx512/ror.h
2022-04-29 11:02:25 +08:00

411 lines
14 KiB
C

#if !defined(SIMDE_X86_AVX512_ROR_H)
#define SIMDE_X86_AVX512_ROR_H
#include "types.h"
#include "mov.h"
#include "or.h"
#include "srli.h"
#include "slli.h"
#include "../avx2.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_ror_epi32(a, imm8) _mm_ror_epi32(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_ror_epi32 (simde__m128i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m128i_private
r_,
a_ = simde__m128i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31)));
break;
}
#else
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31)));
}
break;
}
#endif
return simde__m128i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_ror_epi32
#define _mm_ror_epi32(a, imm8) simde_mm_ror_epi32(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_mask_ror_epi32(src, k, a, imm8) _mm_mask_ror_epi32(src, k, a, imm8)
#else
#define simde_mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_mask_ror_epi32
#define _mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_ror_epi32(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_maskz_ror_epi32(k, a, imm8) _mm_maskz_ror_epi32(k, a, imm8)
#else
#define simde_mm_maskz_ror_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_maskz_ror_epi32
#define _mm_maskz_ror_epi32(src, k, a, imm8) simde_mm_maskz_ror_epi32(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_ror_epi32(a, imm8) _mm256_ror_epi32(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_ror_epi32 (simde__m256i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m256i_private
r_,
a_ = simde__m256i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) {
r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8)));
}
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31)));
break;
}
#else
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31)));
}
break;
}
#endif
return simde__m256i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_ror_epi32
#define _mm256_ror_epi32(a, imm8) simde_mm256_ror_epi32(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_mask_ror_epi32(src, k, a, imm8) _mm256_mask_ror_epi32(src, k, a, imm8)
#else
#define simde_mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_mask_ror_epi32
#define _mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_ror_epi32(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_maskz_ror_epi32(k, a, imm8) _mm256_maskz_ror_epi32(k, a, imm8)
#else
#define simde_mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_maskz_ror_epi32
#define _mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_ror_epi32(k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_ror_epi32(a, imm8) _mm512_ror_epi32(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_ror_epi32 (simde__m512i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m512i_private
r_,
a_ = simde__m512i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) {
r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8)));
}
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31)));
break;
}
#else
switch (imm8 & 31) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31)));
}
break;
}
#endif
return simde__m512i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_ror_epi32
#define _mm512_ror_epi32(a, imm8) simde_mm512_ror_epi32(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_mask_ror_epi32(src, k, a, imm8) _mm512_mask_ror_epi32(src, k, a, imm8)
#else
#define simde_mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_mask_ror_epi32
#define _mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_ror_epi32(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_maskz_ror_epi32(k, a, imm8) _mm512_maskz_ror_epi32(k, a, imm8)
#else
#define simde_mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_ror_epi32(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_maskz_ror_epi32
#define _mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_ror_epi32(k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_ror_epi64(a, imm8) _mm_ror_epi64(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_ror_epi64 (simde__m128i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m128i_private
r_,
a_ = simde__m128i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63)));
break;
}
#else
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63)));
}
break;
}
#endif
return simde__m128i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_ror_epi64
#define _mm_ror_epi64(a, imm8) simde_mm_ror_epi64(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_mask_ror_epi64(src, k, a, imm8) _mm_mask_ror_epi64(src, k, a, imm8)
#else
#define simde_mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_mask_ror_epi64
#define _mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_ror_epi64(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_maskz_ror_epi64(k, a, imm8) _mm_maskz_ror_epi64(k, a, imm8)
#else
#define simde_mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm_maskz_ror_epi64
#define _mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_ror_epi64(k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_ror_epi64(a, imm8) _mm256_ror_epi64(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_ror_epi64 (simde__m256i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m256i_private
r_,
a_ = simde__m256i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) {
r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8)));
}
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63)));
break;
}
#else
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63)));
}
break;
}
#endif
return simde__m256i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_ror_epi64
#define _mm256_ror_epi64(a, imm8) simde_mm256_ror_epi64(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_mask_ror_epi64(src, k, a, imm8) _mm256_mask_ror_epi64(src, k, a, imm8)
#else
#define simde_mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_mask_ror_epi64
#define _mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_ror_epi64(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm256_maskz_ror_epi64(k, a, imm8) _mm256_maskz_ror_epi64(k, a, imm8)
#else
#define simde_mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_maskz_ror_epi64
#define _mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_ror_epi64(k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_ror_epi64(a, imm8) _mm512_ror_epi64(a, imm8)
#else
SIMDE_FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_ror_epi64 (simde__m512i a, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) {
simde__m512i_private
r_,
a_ = simde__m512i_to_private(a);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) {
r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8)));
}
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63)));
break;
}
#else
switch (imm8 & 63) {
case 0:
r_ = a_;
break;
default:
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63)));
}
break;
}
#endif
return simde__m512i_from_private(r_);
}
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_ror_epi64
#define _mm512_ror_epi64(a, imm8) simde_mm512_ror_epi64(a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_mask_ror_epi64(src, k, a, imm8) _mm512_mask_ror_epi64(src, k, a, imm8)
#else
#define simde_mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_mask_ror_epi64
#define _mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_ror_epi64(src, k, a, imm8)
#endif
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_maskz_ror_epi64(k, a, imm8) _mm512_maskz_ror_epi64(k, a, imm8)
#else
#define simde_mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_ror_epi64(a, imm8))
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_maskz_ror_epi64
#define _mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_ror_epi64(k, a, imm8)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_X86_AVX512_ROR_H) */