stk-code_catmod/lib/simd_wrapper/simde/wasm/simd128.h
2022-04-29 11:02:25 +08:00

8917 lines
297 KiB
C

/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_WASM_SIMD128_H)
#define SIMDE_WASM_SIMD128_H
#include "../simde-common.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
typedef union {
#if defined(SIMDE_VECTOR_SUBSCRIPT)
SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#if defined(SIMDE_HAVE_INT128_)
SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#endif
SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#else
SIMDE_ALIGN_TO_16 int8_t i8[16];
SIMDE_ALIGN_TO_16 int16_t i16[8];
SIMDE_ALIGN_TO_16 int32_t i32[4];
SIMDE_ALIGN_TO_16 int64_t i64[2];
SIMDE_ALIGN_TO_16 uint8_t u8[16];
SIMDE_ALIGN_TO_16 uint16_t u16[8];
SIMDE_ALIGN_TO_16 uint32_t u32[4];
SIMDE_ALIGN_TO_16 uint64_t u64[2];
#if defined(SIMDE_HAVE_INT128_)
SIMDE_ALIGN_TO_16 simde_int128 i128[1];
SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
#endif
SIMDE_ALIGN_TO_16 simde_float32 f32[4];
SIMDE_ALIGN_TO_16 simde_float64 f64[2];
SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
#endif
#if defined(SIMDE_X86_SSE_NATIVE)
SIMDE_ALIGN_TO_16 __m128 sse_m128;
#if defined(SIMDE_X86_SSE2_NATIVE)
SIMDE_ALIGN_TO_16 __m128i sse_m128i;
SIMDE_ALIGN_TO_16 __m128d sse_m128d;
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
SIMDE_ALIGN_TO_16 v128_t wasm_v128;
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
#endif
#endif
} simde_v128_private;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
typedef v128_t simde_v128_t;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
typedef int32x4_t simde_v128_t;
#elif defined(SIMDE_X86_SSE2_NATIVE)
typedef __m128i simde_v128_t;
#elif defined(SIMDE_X86_SSE_NATIVE)
typedef __m128 simde_v128_t;
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_v128_t;
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
typedef int32_t simde_v128_t SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#else
typedef simde_v128_private simde_v128_t;
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
typedef simde_v128_t v128_t;
#endif
HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_t), "simde_v128_t size incorrect");
HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_private), "simde_v128_private size incorrect");
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_t) == 16, "simde_v128_t is not 16-byte aligned");
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_private) == 16, "simde_v128_private is not 16-byte aligned");
#endif
#define SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(Other_Type, SIMDe_Type, To_Name, From_Name) \
SIMDE_FUNCTION_ATTRIBUTES \
Other_Type To_Name(SIMDe_Type v) { \
Other_Type r; \
simde_memcpy(&r, &v, sizeof(r)); \
return r; \
} \
\
SIMDE_FUNCTION_ATTRIBUTES \
SIMDe_Type From_Name(Other_Type v) { \
SIMDe_Type r; \
simde_memcpy(&r, &v, sizeof(r)); \
return r; \
}
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(simde_v128_private, simde_v128_t, simde_v128_to_private, simde_v128_from_private)
#if defined(SIMDE_X86_SSE2_NATIVE)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128 , simde_v128_t, simde_v128_to_m128 , simde_v128_from_m128 )
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128i, simde_v128_t, simde_v128_to_m128i, simde_v128_from_m128i)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128d, simde_v128_t, simde_v128_to_m128d, simde_v128_from_m128d)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int8x16_t, simde_v128_t, simde_v128_to_neon_i8 , simde_v128_from_neon_i8 )
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int16x8_t, simde_v128_t, simde_v128_to_neon_i16, simde_v128_from_neon_i16)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int32x4_t, simde_v128_t, simde_v128_to_neon_i32, simde_v128_from_neon_i32)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int64x2_t, simde_v128_t, simde_v128_to_neon_i64, simde_v128_from_neon_i64)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint8x16_t, simde_v128_t, simde_v128_to_neon_u8 , simde_v128_from_neon_u8 )
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint16x8_t, simde_v128_t, simde_v128_to_neon_u16, simde_v128_from_neon_u16)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint32x4_t, simde_v128_t, simde_v128_to_neon_u32, simde_v128_from_neon_u32)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint64x2_t, simde_v128_t, simde_v128_to_neon_u64, simde_v128_from_neon_u64)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float32x4_t, simde_v128_t, simde_v128_to_neon_f32, simde_v128_from_neon_f32)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float64x2_t, simde_v128_t, simde_v128_to_neon_f64, simde_v128_from_neon_f64)
#endif
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed char), simde_v128_t, simde_v128_to_altivec_i8 , simde_v128_from_altivec_i8 )
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed short), simde_v128_t, simde_v128_to_altivec_i16, simde_v128_from_altivec_i16)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed int), simde_v128_t, simde_v128_to_altivec_i32, simde_v128_from_altivec_i32)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), simde_v128_t, simde_v128_to_altivec_u8 , simde_v128_from_altivec_u8 )
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), simde_v128_t, simde_v128_to_altivec_u16, simde_v128_from_altivec_u16)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), simde_v128_t, simde_v128_to_altivec_u32, simde_v128_from_altivec_u32)
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed long long), simde_v128_t, simde_v128_to_altivec_i64, simde_v128_from_altivec_i64)
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), simde_v128_t, simde_v128_to_altivec_u64, simde_v128_from_altivec_u64)
#endif
#if defined(SIMDE_BUG_GCC_95782)
SIMDE_FUNCTION_ATTRIBUTES
SIMDE_POWER_ALTIVEC_VECTOR(float)
simde_v128_to_altivec_f32(simde_v128_t value) {
simde_v128_private r_ = simde_v128_to_private(value);
return r_.altivec_f32;
}
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_v128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) {
simde_v128_private r_;
r_.altivec_f32 = value;
return simde_v128_from_private(r_);
}
#else
SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(float), simde_v128_t, simde_v128_to_altivec_f32, simde_v128_from_altivec_f32)
#endif
#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */
/*
* Begin function implementations
*/
/* load */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load(mem);
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_loadu_si128(HEDLEY_REINTERPRET_CAST(const __m128i*, mem));
#else
simde_v128_t r;
simde_memcpy(&r, mem, sizeof(r));
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load(mem) simde_wasm_v128_load((mem))
#endif
/* store */
SIMDE_FUNCTION_ATTRIBUTES
void
simde_wasm_v128_store (void * mem, simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
wasm_v128_store(mem, a);
#elif defined(SIMDE_X86_SSE2_NATIVE)
_mm_storeu_si128(HEDLEY_REINTERPRET_CAST(__m128i*, mem), a);
#else
simde_memcpy(mem, &a, sizeof(a));
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_store(mem, a) simde_wasm_v128_store((mem), (a))
#endif
/* make */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_make (
int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7,
int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return
wasm_i8x16_make(
c0, c1, c2, c3, c4, c5, c6, c7,
c8, c9, c10, c11, c12, c13, c14, c15);
#elif defined(SIMDE_X86_SSE2_NATIVE)
return
_mm_setr_epi8(
c0, c1, c2, c3, c4, c5, c6, c7,
c8, c9, c10, c11, c12, c13, c14, c15);
#else
simde_v128_private r_;
r_.i8[ 0] = c0;
r_.i8[ 1] = c1;
r_.i8[ 2] = c2;
r_.i8[ 3] = c3;
r_.i8[ 4] = c4;
r_.i8[ 5] = c5;
r_.i8[ 6] = c6;
r_.i8[ 7] = c7;
r_.i8[ 8] = c8;
r_.i8[ 9] = c9;
r_.i8[10] = c10;
r_.i8[11] = c11;
r_.i8[12] = c12;
r_.i8[13] = c13;
r_.i8[14] = c14;
r_.i8[15] = c15;
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i8x16_make( \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
simde_wasm_i8x16_make( \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \
(c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_make (
int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_setr_epi16(c0, c1, c2, c3, c4, c5, c6, c7);
#else
simde_v128_private r_;
r_.i16[0] = c0;
r_.i16[1] = c1;
r_.i16[2] = c2;
r_.i16[3] = c3;
r_.i16[4] = c4;
r_.i16[5] = c5;
r_.i16[6] = c6;
r_.i16[7] = c7;
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7) \
simde_wasm_i16x8_make((c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_make (int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_make(c0, c1, c2, c3);
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_setr_epi32(c0, c1, c2, c3);
#else
simde_v128_private r_;
r_.i32[0] = c0;
r_.i32[1] = c1;
r_.i32[2] = c2;
r_.i32[3] = c3;
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_make(c0, c1, c2, c3) simde_wasm_i32x4_make((c0), (c1), (c2), (c3))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_make (int64_t c0, int64_t c1) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_make(c0, c1);
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_set_epi64x(c1, c0);
#else
simde_v128_private r_;
r_.i64[ 0] = c0;
r_.i64[ 1] = c1;
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_make(c0, c1) simde_wasm_i64x2_make((c0), (c1))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_make (simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_make(c0, c1, c2, c3);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_setr_ps(c0, c1, c2, c3);
#else
r_.f32[0] = c0;
r_.f32[1] = c1;
r_.f32[2] = c2;
r_.f32[3] = c3;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_make(c0, c1, c2, c3) simde_wasm_f32x4_make((c0), (c1), (c2), (c3))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_make (simde_float64 c0, simde_float64 c1) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_make(c0, c1);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_set_pd(c1, c0);
#else
r_.f64[ 0] = c0;
r_.f64[ 1] = c1;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_make(c0, c1) simde_wasm_f64x2_make((c0), (c1))
#endif
/* const */
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i8x16_const( \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
wasm_i8x16_const( \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \
(c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_i8x16_const( \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
SIMDE_ASSERT_CONSTANT_(c2); \
SIMDE_ASSERT_CONSTANT_(c3); \
SIMDE_ASSERT_CONSTANT_(c4); \
SIMDE_ASSERT_CONSTANT_(c5); \
SIMDE_ASSERT_CONSTANT_(c6); \
SIMDE_ASSERT_CONSTANT_(c7); \
SIMDE_ASSERT_CONSTANT_(c8); \
SIMDE_ASSERT_CONSTANT_(c9); \
SIMDE_ASSERT_CONSTANT_(c10); \
SIMDE_ASSERT_CONSTANT_(c11); \
SIMDE_ASSERT_CONSTANT_(c12); \
SIMDE_ASSERT_CONSTANT_(c13); \
SIMDE_ASSERT_CONSTANT_(c13); \
SIMDE_ASSERT_CONSTANT_(c15); \
\
simde_wasm_i8x16_make( \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_const (
int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7,
int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
return simde_wasm_i8x16_make(
c0, c1, c2, c3, c4, c5, c6, c7,
c8, c9, c10, c11, c12, c13, c14, c15);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i8x16_const( \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
simde_wasm_i8x16_const( \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \
(c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15))
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i16x8_const( \
c0, c1, c2, c3, c4, c5, c6, c7) \
wasm_i16x8_const( \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_i16x8_const( \
c0, c1, c2, c3, c4, c5, c6, c7) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
SIMDE_ASSERT_CONSTANT_(c2); \
SIMDE_ASSERT_CONSTANT_(c3); \
SIMDE_ASSERT_CONSTANT_(c4); \
SIMDE_ASSERT_CONSTANT_(c5); \
SIMDE_ASSERT_CONSTANT_(c6); \
SIMDE_ASSERT_CONSTANT_(c7); \
\
simde_wasm_i16x8_make( \
c0, c1, c2, c3, c4, c5, c6, c7); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_const (
int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
return simde_wasm_i16x8_make(
c0, c1, c2, c3, c4, c5, c6, c7);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i16x8_const( \
c0, c1, c2, c3, c4, c5, c6, c7) \
simde_wasm_i16x8_const( \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7))
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i32x4_const( \
c0, c1, c2, c3) \
wasm_i32x4_const( \
(c0), (c1), (c2), (c3))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_i32x4_const( \
c0, c1, c2, c3) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
SIMDE_ASSERT_CONSTANT_(c2); \
SIMDE_ASSERT_CONSTANT_(c3); \
\
simde_wasm_i32x4_make( \
c0, c1, c2, c3); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_const (
int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
return simde_wasm_i32x4_make(
c0, c1, c2, c3);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i32x4_const( \
c0, c1, c2, c3) \
simde_wasm_i32x4_const( \
(c0), (c1), (c2), (c3))
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i64x2_const( \
c0, c1) \
wasm_i64x2_const( \
(c0), (c1))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_i64x2_const( \
c0, c1) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
\
simde_wasm_i64x2_make( \
c0, c1); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_const (
int64_t c0, int64_t c1) {
return simde_wasm_i64x2_make(
c0, c1);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i64x2_const( \
c0, c1) \
simde_wasm_i64x2_const( \
(c0), (c1))
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_f32x4_const( \
c0, c1, c2, c3) \
wasm_f32x4_const( \
(c0), (c1), (c2), (c3))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_f32x4_const( \
c0, c1, c2, c3) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
SIMDE_ASSERT_CONSTANT_(c2); \
SIMDE_ASSERT_CONSTANT_(c3); \
\
simde_wasm_f32x4_make( \
c0, c1, c2, c3); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_const (
simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) {
return simde_wasm_f32x4_make(
c0, c1, c2, c3);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_f32x4_const( \
c0, c1, c2, c3) \
simde_wasm_f32x4_const( \
(c0), (c1), (c2), (c3))
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_f64x2_const( \
c0, c1) \
wasm_f64x2_const( \
(c0), (c1))
#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)
#define \
simde_wasm_f64x2_const( \
c0, c1) \
SIMDE_STATEMENT_EXPR_(({ \
SIMDE_ASSERT_CONSTANT_(c0); \
SIMDE_ASSERT_CONSTANT_(c1); \
\
simde_wasm_f64x2_make( \
c0, c1); \
}))
#else
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_const (
simde_float64 c0, simde_float64 c1) {
return simde_wasm_f64x2_make(
c0, c1);
}
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_f64x2_const( \
c0, c1) \
simde_wasm_f64x2_const( \
(c0), (c1))
#endif
/* splat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_splat (int8_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_set1_epi8(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vdupq_n_s8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i8 = vec_splats(a);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_splat(a) simde_wasm_i8x16_splat((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_splat (int16_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_set1_epi16(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vdupq_n_s16(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i16 = vec_splats(a);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_splat(a) simde_wasm_i16x8_splat((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_splat (int32_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_set1_epi32(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vdupq_n_s32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i32 = vec_splats(a);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_splat(a) simde_wasm_i32x4_splat((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_splat (int64_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))
r_.sse_m128i = _mm_set1_epi64x(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vdupq_n_s64(a);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_splat(a) simde_wasm_i64x2_splat((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_splat (simde_float32 a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_set1_ps(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vdupq_n_f32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
r_.altivec_f32 = vec_splats(a);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_splat(a) simde_wasm_f32x4_splat((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_splat (simde_float64 a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_splat(a);
#else
simde_v128_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_set1_pd(a);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vdupq_n_f64(a);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_f64 = vec_splats(a);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = a;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_splat(a) simde_wasm_f64x2_splat((a))
#endif
/* load_splat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load8_splat (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load8_splat(mem);
#else
int8_t v;
simde_memcpy(&v, mem, sizeof(v));
return simde_wasm_i8x16_splat(v);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load8_splat(mem) simde_wasm_v128_load8_splat((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load16_splat (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load16_splat(mem);
#else
int16_t v;
simde_memcpy(&v, mem, sizeof(v));
return simde_wasm_i16x8_splat(v);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load16_splat(mem) simde_wasm_v128_load16_splat((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load32_splat (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load32_splat(mem);
#else
int32_t v;
simde_memcpy(&v, mem, sizeof(v));
return simde_wasm_i32x4_splat(v);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load32_splat(mem) simde_wasm_v128_load32_splat((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load64_splat (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load64_splat(mem);
#else
int64_t v;
simde_memcpy(&v, mem, sizeof(v));
return simde_wasm_i64x2_splat(v);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load64_splat(mem) simde_wasm_v128_load64_splat((mem))
#endif
/* extract_lane
*
* Note that, unlike normal WASM SIMD128 we return intN_t instead of
* int for sizeof(X) <= sizeof(int). This is done for portability;
* the regular API doesn't have to worry about things like int being
* 16 bits (like on AVR).
*
* This does mean that code which works in SIMDe may not work without
* changes on WASM, but luckily the necessary changes (i.e., casting
* the return values to smaller type when assigning to the smaller
* type) mean the code will work in *both* SIMDe and a native
* implementation. If you use the simde_* prefixed functions it will
* always work. */
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_wasm_i8x16_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.i8[lane & 15];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, wasm_i8x16_extract_lane((a), (lane)))
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
#define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(simde_v128_to_m128i(a), (lane) & 15))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_wasm_i8x16_extract_lane(a, lane) vgetq_lane_s8(simde_v128_to_neon_i8(a), (lane) & 15)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_extract_lane(a, lane) simde_wasm_i8x16_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_wasm_i16x8_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.i16[lane & 7];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane((a), (lane)))
#elif defined(SIMDE_X86_SSE2_NATIVE)
#define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, _mm_extract_epi16((a), (lane) & 7))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i16x8_extract_lane(a, lane) vgetq_lane_s16(simde_v128_to_neon_i16(a), (lane) & 7)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extract_lane(a, lane) simde_wasm_i16x8_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_wasm_i32x4_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.i32[lane & 3];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane((a), (lane)))
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
#define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, _mm_extract_epi32((a), (lane) & 3))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i32x4_extract_lane(a, lane) vgetq_lane_s32(simde_v128_to_neon_i32(a), (lane) & 3)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extract_lane(a, lane) simde_wasm_i32x4_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_wasm_i64x2_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.i64[lane & 1];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane((a), (lane)))
#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
#define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, _mm_extract_epi64((a), (lane) & 1))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i64x2_extract_lane(a, lane) vgetq_lane_s64(simde_v128_to_neon_i64(a), (lane) & 1)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_extract_lane(a, lane) simde_wasm_i64x2_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_wasm_u8x16_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.u8[lane & 15];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_u8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint8_t, wasm_u8x16_extract_lane((a), (lane)))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_wasm_u8x16_extract_lane(a, lane) vgetq_lane_u8(simde_v128_to_neon_u8(a), (lane) & 15)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_extract_lane(a, lane) simde_wasm_u8x16_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_wasm_u16x8_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.u16[lane & 7];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_u16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint16_t, wasm_u16x8_extract_lane((a), (lane)))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_u16x8_extract_lane(a, lane) vgetq_lane_u16(simde_v128_to_neon_u16(a), (lane) & 7)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extract_lane(a, lane) simde_wasm_u16x8_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32
simde_wasm_f32x4_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.f32[lane & 3];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_f32x4_extract_lane(a, lane) wasm_f32x4_extract_lane((a), (lane))
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
#define simde_wasm_f32x4(a, lane) _mm_extract_ps(simde_v128_to_m128(a), (lane) & 3)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_f32x4_extract_lane(a, lane) vgetq_lane_f32(simde_v128_to_neon_f32(a), (lane) & 3)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_extract_lane(a, lane) simde_wasm_f32x4_extract_lane((a), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64
simde_wasm_f64x2_extract_lane (simde_v128_t a, const int lane) {
simde_v128_private a_ = simde_v128_to_private(a);
return a_.f64[lane & 1];
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_f64x2_extract_lane(a, lane) wasm_f64x2_extract_lane((a), (lane))
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_f64x2_extract_lane(a, lane) vgetq_lane_f64(simde_v128_to_neon_f64(a), (lane) & 1)
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_extract_lane(a, lane) simde_wasm_f64x2_extract_lane((a), (lane))
#endif
/* replace_lane */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_replace_lane (simde_v128_t a, const int lane, int8_t value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.i8[lane & 15] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i8x16_replace_lane(a, lane, value) wasm_i8x16_replace_lane((a), (lane), (value))
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)
#define simde_wasm_i8x16_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi8((a), (value), (lane) & 15))
#else
#define simde_wasm_i8x16_replace_lane(a, lane, value) _mm_insert_epi8((a), (value), (lane) & 15)
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_wasm_i8x16_replace_lane(a, lane, value) simde_v128_from_neon_i8(vsetq_lane_s8((value), simde_v128_to_neon_i8(a), (lane) & 15))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_replace_lane(a, lane, value) simde_wasm_i8x16_replace_lane((a), (lane), (value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_replace_lane (simde_v128_t a, const int lane, int16_t value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.i16[lane & 7] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i16x8_replace_lane(a, lane, value) wasm_i16x8_replace_lane((a), (lane), (value))
#elif defined(SIMDE_X86_SSE2_NATIVE)
#define simde_wasm_i16x8_replace_lane(a, lane, value) _mm_insert_epi16((a), (value), (lane) & 7)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i16x8_replace_lane(a, lane, value) simde_v128_from_neon_i16(vsetq_lane_s16((value), simde_v128_to_neon_i16(a), (lane) & 7))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_replace_lane(a, lane, value) simde_wasm_i16x8_replace_lane((a), (lane), (value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_replace_lane (simde_v128_t a, const int lane, int32_t value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.i32[lane & 3] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i32x4_replace_lane(a, lane, value) wasm_i32x4_replace_lane((a), (lane), (value))
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)
#define simde_wasm_i32x4_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi32((a), (value), (lane) & 3))
#else
#define simde_wasm_i32x4_replace_lane(a, lane, value) _mm_insert_epi32((a), (value), (lane) & 3)
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i32x4_replace_lane(a, lane, value) simde_v128_from_neon_i32(vsetq_lane_s32((value), simde_v128_to_neon_i32(a), (lane) & 3))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_replace_lane(a, lane, value) simde_wasm_i32x4_replace_lane((a), (lane), (value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_replace_lane (simde_v128_t a, const int lane, int64_t value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.i64[lane & 1] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_i64x2_replace_lane(a, lane, value) wasm_i64x2_replace_lane((a), (lane), (value))
#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
#define simde_wasm_i64x2_replace_lane(a, lane, value) _mm_insert_epi64((a), (value), (lane) & 1)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_i64x2_replace_lane(a, lane, value) simde_v128_from_neon_i64(vsetq_lane_s64((value), simde_v128_to_neon_i64(a), (lane) & 1))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_replace_lane(a, lane, value) simde_wasm_i64x2_replace_lane((a), (lane), (value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_replace_lane (simde_v128_t a, const int lane, simde_float32 value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.f32[lane & 3] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_f32x4_replace_lane(a, lane, value) wasm_f32x4_replace_lane((a), (lane), (value))
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_f32x4_replace_lane(a, lane, value) simde_v128_from_neon_f32(vsetq_lane_f32((value), simde_v128_to_neon_f32(a), (lane) & 3))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_replace_lane(a, lane, value) simde_wasm_f32x4_replace_lane((a), (lane), (value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_replace_lane (simde_v128_t a, const int lane, simde_float64 value) {
simde_v128_private a_ = simde_v128_to_private(a);
a_.f64[lane & 1] = value;
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_f64x2_replace_lane(a, lane, value) wasm_f64x2_replace_lane((a), (lane), (value))
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES)
#define simde_wasm_f64x2_replace_lane(a, lane, value) simde_v128_from_neon_f64(vsetq_lane_f64((value), simde_v128_to_neon_f64(a), (lane) & 1))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_replace_lane(a, lane, value) simde_wasm_f64x2_replace_lane((a), (lane), (value))
#endif
/* eq */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_eq(a, b) simde_wasm_i8x16_eq((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_eq(a, b) simde_wasm_i16x8_eq((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_eq(a, b) simde_wasm_i32x4_eq((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_eq(a, b) simde_wasm_i64x2_eq((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmpeq_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_eq(a, b) simde_wasm_f32x4_eq((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_eq (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_eq(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmpeq_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vceqq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 == b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_eq(a, b) simde_wasm_f64x2_eq((a), (b))
#endif
/* ne */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_ne(a, b) simde_wasm_i8x16_ne((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_ne(a, b) simde_wasm_i16x8_ne((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_ne(a, b) simde_wasm_i32x4_ne((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_ne(a, b) simde_wasm_i64x2_ne((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmpneq_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 != b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_ne(a, b) simde_wasm_f32x4_ne((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_ne (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_ne(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmpneq_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 != b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_ne(a, b) simde_wasm_f64x2_ne((a), (b))
#endif
/* lt */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(a_.altivec_i8, b_.altivec_i8));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_lt(a, b) simde_wasm_i8x16_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmplt_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_lt(a, b) simde_wasm_i16x8_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_lt(a, b) simde_wasm_i32x4_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
int32x4_t tmp = vorrq_s32(
vandq_s32(
vreinterpretq_s32_u32(vceqq_s32(b_.neon_i32, a_.neon_i32)),
vsubq_s32(a_.neon_i32, b_.neon_i32)
),
vreinterpretq_s32_u32(vcgtq_s32(b_.neon_i32, a_.neon_i32))
);
int32x4x2_t trn = vtrnq_s32(tmp, tmp);
r_.neon_i32 = trn.val[1];
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
r_.sse_m128i = _mm_cmpgt_epi64(b_.sse_m128i, a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://stackoverflow.com/a/65175746 */
r_.sse_m128i =
_mm_shuffle_epi32(
_mm_or_si128(
_mm_and_si128(
_mm_cmpeq_epi32(b_.sse_m128i, a_.sse_m128i),
_mm_sub_epi64(a_.sse_m128i, b_.sse_m128i)
),
_mm_cmpgt_epi32(
b_.sse_m128i,
a_.sse_m128i
)
),
_MM_SHUFFLE(3, 3, 1, 1)
);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed int) tmp =
vec_or(
vec_and(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(b_.altivec_i32, a_.altivec_i32)),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_sub(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.altivec_i32),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b_.altivec_i32)
))
),
vec_cmpgt(b_.altivec_i32, a_.altivec_i32)
);
r_.altivec_i32 = vec_mergeo(tmp, tmp);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_lt(a, b) simde_wasm_i64x2_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a_.altivec_u8, b_.altivec_u8));
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp = _mm_subs_epu8(b_.sse_m128i, a_.sse_m128i);
r_.sse_m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_lt(a, b) simde_wasm_u8x16_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a_.altivec_u16, b_.altivec_u16));
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp = _mm_subs_epu16(b_.sse_m128i, a_.sse_m128i);
r_.sse_m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_lt(a, b) simde_wasm_u16x8_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_xor_si128(
_mm_cmpgt_epi32(b_.sse_m128i, a_.sse_m128i),
_mm_srai_epi32(_mm_xor_si128(b_.sse_m128i, a_.sse_m128i), 31)
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a_.altivec_u32, b_.altivec_u32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_lt(a, b) simde_wasm_u32x4_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmplt_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 < b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_lt(a, b) simde_wasm_f32x4_lt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_lt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_lt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmplt_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 < b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_lt(a, b) simde_wasm_f64x2_lt((a), (b))
#endif
/* gt */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_gt(a, b);
#else
return simde_wasm_i8x16_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_gt(a, b) simde_wasm_i8x16_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_gt(a, b);
#else
return simde_wasm_i16x8_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_gt(a, b) simde_wasm_i16x8_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_gt(a, b);
#else
return simde_wasm_i32x4_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_gt(a, b) simde_wasm_i32x4_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_gt(a, b);
#else
return simde_wasm_i64x2_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_gt(a, b) simde_wasm_i64x2_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_gt(a, b);
#else
return simde_wasm_u8x16_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_gt(a, b) simde_wasm_u8x16_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_gt(a, b);
#else
return simde_wasm_u16x8_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_gt(a, b) simde_wasm_u16x8_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_gt(a, b);
#else
return simde_wasm_u32x4_lt(b, a);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_gt(a, b) simde_wasm_u32x4_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_gt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmpgt_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 > b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_gt(a, b) simde_wasm_f32x4_gt((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_gt (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_gt(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmpgt_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.f64 > b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_gt(a, b) simde_wasm_f64x2_gt((a), (b))
#endif
/* le */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, _mm_min_epi8(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_le(a, b) simde_wasm_i8x16_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, _mm_min_epi16(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcleq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_le(a, b) simde_wasm_i16x8_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, _mm_min_epi32(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_le(a, b) simde_wasm_i32x4_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, _mm_min_epi64(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_le(a, b) simde_wasm_i64x2_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_le(a, b) simde_wasm_u8x16_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_le(a, b) simde_wasm_u16x8_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_le(a, b) simde_wasm_u32x4_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmple_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 <= b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_le(a, b) simde_wasm_f32x4_le((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_le (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_le(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmple_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 <= b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_le(a, b) simde_wasm_f64x2_le((a), (b))
#endif
/* ge */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epi8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_ge(a, b) simde_wasm_i8x16_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epi16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_ge(a, b) simde_wasm_i16x8_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epi32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_ge(a, b) simde_wasm_i32x4_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi64(_mm_min_epi64(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_ge(a, b) simde_wasm_i64x2_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epu8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_ge(a, b) simde_wasm_u8x16_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epu16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_ge(a, b) simde_wasm_u16x8_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epu32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_ge(a, b) simde_wasm_u32x4_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cmpge_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 >= b_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_ge(a, b) simde_wasm_f32x4_ge((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_ge (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_ge(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cmpge_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 >= b_.f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_ge(a, b) simde_wasm_f64x2_ge((a), (b))
#endif
/* not */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_not (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_not(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0)));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmvnq_s32(a_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = ~a_.i32f;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = ~(a_.i32f[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_not(a) simde_wasm_v128_not((a))
#endif
/* and */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_and (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_and(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_and_si128(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = a_.i32f & b_.i32f;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_and(a, b) simde_wasm_v128_and((a), (b))
#endif
/* or */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_or (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_or(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_or_si128(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = a_.i32f | b_.i32f;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_or(a, b) simde_wasm_v128_or((a), (b))
#endif
/* xor */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_xor (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_xor(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = a_.i32f ^ b_.i32f;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_xor(a, b) simde_wasm_v128_xor((a), (b))
#endif
/* andnot */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_andnot (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_andnot(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_andnot_si128(b_.sse_m128i, a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vbicq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = a_.i32f & ~b_.i32f;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = a_.i32f[i] & ~b_.i32f[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_andnot(a, b) simde_wasm_v128_andnot((a), (b))
#endif
/* bitselect */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_bitselect (simde_v128_t a, simde_v128_t b, simde_v128_t mask) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_bitselect(a, b, mask);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
mask_ = simde_v128_to_private(mask),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.sse_m128i = _mm_ternarylogic_epi32(mask_.sse_m128i, a_.sse_m128i, b_.sse_m128i, 0xca);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_or_si128(
_mm_and_si128 (mask_.sse_m128i, a_.sse_m128i),
_mm_andnot_si128(mask_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vbslq_s32(mask_.neon_u32, a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, mask_.altivec_u32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32f = (a_.i32f & mask_.i32f) | (b_.i32f & ~mask_.i32f);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = (a_.i32f[i] & mask_.i32f[i]) | (b_.i32f[i] & ~mask_.i32f[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_bitselect(a, b, c) simde_wasm_v128_bitselect((a), (b), (c))
#endif
/* bitmask */
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_wasm_i8x16_bitmask (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_bitmask(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
uint32_t r = 0;
#if defined(SIMDE_X86_SSE2_NATIVE)
r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(a_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
/* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */
static const uint8_t md[16] = {
1 << 0, 1 << 1, 1 << 2, 1 << 3,
1 << 4, 1 << 5, 1 << 6, 1 << 7,
1 << 0, 1 << 1, 1 << 2, 1 << 3,
1 << 4, 1 << 5, 1 << 6, 1 << 7,
};
/* Extend sign bit over entire lane */
uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7));
/* Clear all but the bit we're interested in. */
uint8x16_t masked = vandq_u8(vld1q_u8(md), extended);
/* Alternate bytes from low half and high half */
uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked));
uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1]));
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u16(x);
#else
uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x));
r =
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) +
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1));
#endif
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#else
SIMDE_VECTORIZE_REDUCTION(|:r)
for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) {
r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i8[i] < 0) << i);
}
#endif
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_bitmask(a) simde_wasm_i8x16_bitmask((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_wasm_i16x8_bitmask (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_bitmask(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
uint32_t r = 0;
#if defined(SIMDE_X86_SSE2_NATIVE)
r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(_mm_packs_epi16(a_.sse_m128i, _mm_setzero_si128())));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
static const uint16_t md[8] = {
1 << 0, 1 << 1, 1 << 2, 1 << 3,
1 << 4, 1 << 5, 1 << 6, 1 << 7,
};
uint16x8_t extended = vreinterpretq_u16_s16(vshrq_n_s16(a_.neon_i16, 15));
uint16x8_t masked = vandq_u16(vld1q_u16(md), extended);
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u16(masked);
#else
uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(masked));
r =
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) +
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1));
#endif
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#else
SIMDE_VECTORIZE_REDUCTION(|:r)
for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) {
r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i16[i] < 0) << i);
}
#endif
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_bitmask(a) simde_wasm_i16x8_bitmask((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_wasm_i32x4_bitmask (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_bitmask(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
uint32_t r = 0;
#if defined(SIMDE_X86_SSE_NATIVE)
r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_ps(a_.sse_m128));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
static const uint32_t md[4] = {
1 << 0, 1 << 1, 1 << 2, 1 << 3
};
uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31));
uint32x4_t masked = vandq_u32(vld1q_u32(md), extended);
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = HEDLEY_STATIC_CAST(uint32_t, vaddvq_u32(masked));
#else
uint64x2_t t64 = vpaddlq_u32(masked);
r =
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) +
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1));
#endif
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#else
SIMDE_VECTORIZE_REDUCTION(|:r)
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) << i);
}
#endif
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_bitmask(a) simde_wasm_i32x4_bitmask((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_wasm_i64x2_bitmask (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_bitmask(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
uint32_t r = 0;
#if defined(SIMDE_X86_SSE2_NATIVE)
r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_pd(a_.sse_m128d));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63);
r =
HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 0)) +
(HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 1)) << 1);
HEDLEY_DIAGNOSTIC_POP
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);
r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));
#else
SIMDE_VECTORIZE_REDUCTION(|:r)
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i64[i] < 0) << i);
}
#endif
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_bitmask(a) simde_wasm_i64x2_bitmask((a))
#endif
/* abs */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_abs_epi8(a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vabsq_s8(a_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_abs(a_.altivec_i8);
#elif defined(SIMDE_VECTOR_SCALAR)
__typeof__(r_.i8) mask = HEDLEY_REINTERPRET_CAST(__typeof__(mask), a_.i8 < 0);
r_.i8 = (-a_.i8 & mask) | (a_.i8 & ~mask);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_abs(a) simde_wasm_i8x16_abs((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_abs_epi16(a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vabsq_s16(a_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_abs(a_.altivec_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] < INT8_C(0)) ? -a_.i16[i] : a_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_abs(a) simde_wasm_i16x8_abs((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_abs_epi32(a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i32 = vabsq_s32(a_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_abs(a_.altivec_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.i32) z = { 0, };
__typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < z);
r_.i32 = (-a_.i32 & m) | (a_.i32 & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_abs(a) simde_wasm_i32x4_abs((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.sse_m128i = _mm_abs_epi64(a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i64 = vabsq_s64(a_.neon_i64);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_abs(a_.altivec_i64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.i64) z = { 0, };
__typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z);
r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_abs(a) simde_wasm_i64x2_abs((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vabsq_f32(a_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 = vec_abs(a_.altivec_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
int32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < SIMDE_FLOAT32_C(0.0));
r_.f32 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f32),
(
(HEDLEY_REINTERPRET_CAST(__typeof__(m), -a_.f32) & m) |
(HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & ~m)
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = (a_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? -a_.f32[i] : a_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_abs(a) simde_wasm_f32x4_abs((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_abs (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_abs(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vabsq_f64(a_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = vec_abs(a_.altivec_f64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
int64_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64 < SIMDE_FLOAT64_C(0.0));
r_.f64 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f64),
(
(HEDLEY_REINTERPRET_CAST(__typeof__(m), -a_.f64) & m) |
(HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64) & ~m)
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = (a_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? -a_.f64[i] : a_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_abs(a) simde_wasm_f64x2_abs((a))
#endif
/* neg */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vnegq_s8(a_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
r_.altivec_i8 = vec_neg(a_.altivec_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = -a_.i8;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = -a_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_neg(a) simde_wasm_i8x16_neg((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vnegq_s16(a_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i16 = vec_neg(a_.altivec_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = -a_.i16;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = -a_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_neg(a) simde_wasm_i16x8_neg((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vnegq_s32(a_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i32 = vec_neg(a_.altivec_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = -a_.i32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = -a_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_neg(a) simde_wasm_i32x4_neg((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i64 = vnegq_s64(a_.neon_i64);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_neg(a_.altivec_i64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = -a_.i64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = -a_.i64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_neg(a) simde_wasm_i64x2_neg((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vnegq_f32(a_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f32 = vec_neg(a_.altivec_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f32 = -a_.f32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = -a_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_neg(a) simde_wasm_f32x4_neg((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_neg (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_neg(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vnegq_f64(a_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f64 = vec_neg(a_.altivec_f64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f64 = -a_.f64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = -a_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_neg(a) simde_wasm_f64x2_neg((a))
#endif
/* any_true */
SIMDE_FUNCTION_ATTRIBUTES
simde_bool
simde_wasm_v128_any_true (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_any_true(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
simde_bool r = 0;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r = !_mm_test_all_zeros(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r = _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) != 0xffff;
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = !!vmaxvq_u32(a_.neon_u32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
uint32x2_t tmp = vpmax_u32(vget_low_u32(a_.u32), vget_high_u32(a_.u32));
r = vget_lane_u32(tmp, 0);
r |= vget_lane_u32(tmp, 1);
r = !!r;
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r = HEDLEY_STATIC_CAST(simde_bool, vec_any_ne(a_.altivec_i32, vec_splats(0)));
#else
int_fast32_t ri = 0;
SIMDE_VECTORIZE_REDUCTION(|:ri)
for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {
ri |= (a_.i32f[i]);
}
r = !!ri;
#endif
return r;
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_any_true(a) simde_wasm_v128_any_true((a))
#endif
/* all_true */
SIMDE_FUNCTION_ATTRIBUTES
simde_bool
simde_wasm_i8x16_all_true (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_all_true(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_test_all_zeros(_mm_cmpeq_epi8(a_.sse_m128i, _mm_set1_epi8(INT8_C(0))), _mm_set1_epi8(~INT8_C(0)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) == 0;
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return !vmaxvq_u8(vceqzq_u8(a_.neon_u8));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
uint8x16_t zeroes = vdupq_n_u8(0);
uint8x16_t false_set = vceqq_u8(a_.neon_u8, vdupq_n_u8(0));
uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u8(false_set), vreinterpretq_u32_u8(zeroes));
uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true));
return !!(
vget_lane_u32(q_all_true, 0) &
vget_lane_u32(q_all_true, 1));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(signed char, 0))));
#else
int8_t r = !INT8_C(0);
SIMDE_VECTORIZE_REDUCTION(&:r)
for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) {
r &= !!(a_.i8[i]);
}
return r;
#endif
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_all_true(a) simde_wasm_i8x16_all_true((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_bool
simde_wasm_i16x8_all_true (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_all_true(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_test_all_zeros(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi16(~INT16_C(0)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_movemask_epi8(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128())) == 0;
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return !vmaxvq_u16(vceqzq_u16(a_.neon_u16));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
uint16x8_t zeroes = vdupq_n_u16(0);
uint16x8_t false_set = vceqq_u16(a_.neon_u16, vdupq_n_u16(0));
uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u16(false_set), vreinterpretq_u32_u16(zeroes));
uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true));
return !!(
vget_lane_u32(q_all_true, 0) &
vget_lane_u32(q_all_true, 1));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(signed short, 0))));
#else
int16_t r = !INT16_C(0);
SIMDE_VECTORIZE_REDUCTION(&:r)
for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) {
r &= !!(a_.i16[i]);
}
return r;
#endif
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_all_true(a) simde_wasm_i16x8_all_true((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_bool
simde_wasm_i32x4_all_true (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_all_true(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_test_all_zeros(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()))) == 0;
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return !vmaxvq_u32(vceqzq_u32(a_.neon_u32));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
uint32x4_t d_all_true = vmvnq_u32(vceqq_u32(a_.neon_u32, vdupq_n_u32(0)));
uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true));
return !!(
vget_lane_u32(q_all_true, 0) &
vget_lane_u32(q_all_true, 1));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(signed int, 0))));
#else
int32_t r = !INT32_C(0);
SIMDE_VECTORIZE_REDUCTION(&:r)
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
r &= !!(a_.i32[i]);
}
return r;
#endif
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_all_true(a) simde_wasm_i32x4_all_true((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_bool
simde_wasm_i64x2_all_true (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE) && defined(__wasm_unimplemented_simd128__)
return wasm_i64x2_all_true(a);
#else
simde_v128_private a_ = simde_v128_to_private(a);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_test_all_zeros(_mm_cmpeq_epi64(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
return _mm_movemask_pd(_mm_cmpeq_pd(a_.sse_m128d, _mm_setzero_pd())) == 0;
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i64, HEDLEY_REINTERPRET_CAST(__typeof__(a_.altivec_i64), vec_splats(0))));
#else
int64_t r = !INT32_C(0);
SIMDE_VECTORIZE_REDUCTION(&:r)
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
r &= !!(a_.i64[i]);
}
return r;
#endif
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__wasm_unimplemented_simd128__))
#define wasm_i64x2_all_true(a) simde_wasm_i64x2_all_true((a))
#endif
/* shl */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_shl (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_shl(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_sl(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i8 = a_.i8 << (count & 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << (count & 7));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_shl(a, count) simde_wasm_i8x16_shl((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_shl (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_shl(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
return _mm_sll_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_sl(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i16 = a_.i16 << (count & 15);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (count & 15));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_shl(a, count) simde_wasm_i16x8_shl((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_shl (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_shl(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
return _mm_sll_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_sl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i32 = a_.i32 << (count & 31);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (count & 31));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_shl(a, count) simde_wasm_i32x4_shl((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_shl (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_shl(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
return _mm_sll_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, count)));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_sl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i64 = a_.i64 << (count & 63);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (count & 63));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_shl(a, count) simde_wasm_i64x2_shl((a), (count))
#endif
/* shr */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_sra(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i8 = a_.i8 >> (count & 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> (count & 7));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_shr(a, count) simde_wasm_i8x16_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
return _mm_sra_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i16 = a_.i16 >> (count & 15);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> (count & 15));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_shr(a, count) simde_wasm_i16x8_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_sra_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i32 = a_.i32 >> (count & 31);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> (count & 31));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_shr(a, count) simde_wasm_i32x4_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
return _mm_sra_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.i64 = a_.i64 >> (count & 63);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> (count & 63));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_shr(a, count) simde_wasm_i64x2_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vshlq_u8(a_.neon_u8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = vec_sr(a_.altivec_u8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.u8 = a_.u8 >> (count & 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> (count & 7));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_shr(a, count) simde_wasm_u8x16_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
return _mm_srl_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.u16 = a_.u16 >> (count & 15);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> (count & 15));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_shr(a, count) simde_wasm_u16x8_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_srl_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.u32 = a_.u32 >> (count & 31);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> (count & 31));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_shr(a, count) simde_wasm_u32x4_shr((a), (count))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_shr (simde_v128_t a, uint32_t count) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_shr(a, count);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
return _mm_srl_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -count)));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR)
r_.u64 = a_.u64 >> (count & 63);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> (count & 63));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_shr(a, count) simde_wasm_u64x2_shr((a), (count))
#endif
/* add */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_add_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = a_.i8 + b_.i8;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = a_.i8[i] + b_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_add(a, b) simde_wasm_i8x16_add((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_add_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = a_.i16 + b_.i16;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = a_.i16[i] + b_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_add(a, b) simde_wasm_i16x8_add((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_add_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = a_.i32 + b_.i32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = a_.i32[i] + b_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_add(a, b) simde_wasm_i32x4_add((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_add_epi64(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = a_.i64 + b_.i64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = a_.i64[i] + b_.i64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_add(a, b) simde_wasm_i64x2_add((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_add_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f32 = a_.f32 + b_.f32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = a_.f32[i] + b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_add(a, b) simde_wasm_f32x4_add((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_add (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_add(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_add_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f64 = a_.f64 + b_.f64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = a_.f64[i] + b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_add(a, b) simde_wasm_f64x2_add((a), (b))
#endif
/* sub */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i8 = a_.i8 - b_.i8;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = a_.i8[i] - b_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_sub(a, b) simde_wasm_i8x16_sub((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = a_.i16 - b_.i16;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = a_.i16[i] - b_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_sub(a, b) simde_wasm_i16x8_sub((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = a_.i32 - b_.i32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = a_.i32[i] - b_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_sub(a, b) simde_wasm_i32x4_sub((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_sub_epi64(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = a_.i64 - b_.i64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = a_.i64[i] - b_.i64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_sub(a, b) simde_wasm_i64x2_sub((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_sub_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f32 = a_.f32 - b_.f32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = a_.f32[i] - b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_sub(a, b) simde_wasm_f32x4_sub((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_sub (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_sub(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_sub_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f64 = a_.f64 - b_.f64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = a_.f64[i] - b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_sub(a, b) simde_wasm_f64x2_sub((a), (b))
#endif
/* mul */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_mul (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_mul(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 =
vec_pack(
vec_mule(a_.altivec_i16, b_.altivec_i16),
vec_mulo(a_.altivec_i16, b_.altivec_i16)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i16 = a_.i16 * b_.i16;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = a_.i16[i] * b_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_mul(a, b) simde_wasm_i16x8_mul((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_mul (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_mul(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_mullo_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i32 = a_.i32 * b_.i32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = a_.i32[i] * b_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_mul(a, b) simde_wasm_i32x4_mul((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_mul (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_mul(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE)
r_.sse_m128i = _mm_mullo_epi64(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.i64 = a_.i64 * b_.i64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = a_.i64[i] * b_.i64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_mul(a, b) simde_wasm_i64x2_mul((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_mul (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_mul(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_mul_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f32 = a_.f32 * b_.f32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = a_.f32[i] * b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_mul(a, b) simde_wasm_f32x4_mul((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_mul (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_mul(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_mul_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f64 = a_.f64 * b_.f64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = a_.f64[i] * b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_mul(a, b) simde_wasm_f64x2_mul((a), (b))
#endif
/* q15mulr_sat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_q15mulr_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_q15mulr_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
/* https://github.com/WebAssembly/simd/pull/365 */
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqrdmulhq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
__m128i y = _mm_mulhrs_epi16(a_.sse_m128i, b_.sse_m128i);
__m128i tmp = _mm_cmpeq_epi16(y, _mm_set1_epi16(INT16_MAX));
r_.sse_m128i = _mm_xor_si128(y, tmp);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i prod_lo = _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i);
const __m128i prod_hi = _mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i);
const __m128i tmp =
_mm_add_epi16(
_mm_avg_epu16(
_mm_srli_epi16(prod_lo, 14),
_mm_setzero_si128()
),
_mm_add_epi16(prod_hi, prod_hi)
);
r_.sse_m128i =
_mm_xor_si128(
tmp,
_mm_cmpeq_epi16(_mm_set1_epi16(INT16_MAX), tmp)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
tmp += UINT32_C(0x4000);
tmp >>= 15;
r_.i16[i] = (tmp < INT16_MIN) ? INT16_MIN : ((tmp > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, tmp));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_q15mulr_sat(a, b) simde_wasm_i16x8_q15mulr_sat((a), (b))
#endif
/* min */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_min_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i m = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i);
r_.sse_m128i =
_mm_or_si128(
_mm_and_si128(m, a_.sse_m128i),
_mm_andnot_si128(m, b_.sse_m128i)
);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_min(a, b) simde_wasm_i8x16_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_min_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_min(a, b) simde_wasm_i16x8_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_min_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i m = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i);
r_.sse_m128i =
_mm_or_si128(
_mm_and_si128(m, a_.sse_m128i),
_mm_andnot_si128(m, b_.sse_m128i)
);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? a_.i32[i] : b_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_min(a, b) simde_wasm_i32x4_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_min_epu8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_min(a, b) simde_wasm_u8x16_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_min_epu16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.sse_m128i = _mm_sub_epi16(a, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_min(a, b) simde_wasm_u16x8_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_min_epu32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i i32_min = _mm_set1_epi32(INT32_MIN);
const __m128i difference = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i);
__m128i m =
_mm_cmpeq_epi32(
/* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */
_mm_and_si128(
difference,
_mm_xor_si128(
_mm_cmpgt_epi32(
_mm_xor_si128(difference, i32_min),
_mm_xor_si128(a_.sse_m128i, i32_min)
),
_mm_set1_epi32(~INT32_C(0))
)
),
_mm_setzero_si128()
);
r_.sse_m128i =
_mm_or_si128(
_mm_and_si128(m, a_.sse_m128i),
_mm_andnot_si128(m, b_.sse_m128i)
);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_min(a, b) simde_wasm_u32x4_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128 = _mm_blendv_ps(
_mm_set1_ps(SIMDE_MATH_NANF),
_mm_min_ps(a_.sse_m128, b_.sse_m128),
_mm_cmpord_ps(a_.sse_m128, b_.sse_m128));
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128 m = _mm_cmpord_ps(a_.sse_m128, b_.sse_m128);
r_.sse_m128 =
_mm_or_ps(
_mm_and_ps(m, _mm_min_ps(a_.sse_m128, b_.sse_m128)),
_mm_andnot_ps(m, _mm_set1_ps(SIMDE_MATH_NANF))
);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) condition;
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) a_lt_b =
vec_cmpgt(b_.altivec_f32, a_.altivec_f32);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
condition = vec_orc(a_lt_b, vec_cmpeq(a_.altivec_f32, a_.altivec_f32));
#else
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) a_not_nan =
vec_cmpeq(a_.altivec_f32, a_.altivec_f32);
condition = vec_or(a_lt_b, vec_nor(a_not_nan, a_not_nan));
#endif
r_.altivec_f32 =
vec_sel(
b_.altivec_f32,
a_.altivec_f32,
condition
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = (simde_math_isnan(a_.f32[i]) || (a_.f32[i] < b_.f32[i])) ? a_.f32[i] : b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_min(a, b) simde_wasm_f32x4_min((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_min (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_min(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128d = _mm_blendv_pd(
_mm_set1_pd(SIMDE_MATH_NAN),
_mm_min_pd(a_.sse_m128d, b_.sse_m128d),
_mm_cmpord_pd(a_.sse_m128d, b_.sse_m128d));
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128d m = _mm_cmpord_pd(a_.sse_m128d, b_.sse_m128d);
r_.sse_m128d =
_mm_or_pd(
_mm_and_pd(m, _mm_min_pd(a_.sse_m128d, b_.sse_m128d)),
_mm_andnot_pd(m, _mm_set1_pd(SIMDE_MATH_NAN))
);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f64 =
vec_sel(
b_.altivec_f64,
a_.altivec_f64,
vec_orc(
vec_cmpgt(b_.altivec_f64, a_.altivec_f64),
vec_cmpeq(a_.altivec_f64, a_.altivec_f64)
)
);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = (simde_math_isnan(a_.f64[i]) || (a_.f64[i] < b_.f64[i])) ? a_.f64[i] : b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_min(a, b) simde_wasm_f64x2_min((a), (b))
#endif
/* max */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_max_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i m = _mm_cmpgt_epi8(a_.sse_m128i, b_.sse_m128i);
r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.i8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8);
r_.i8 = (m & a_.i8) | (~m & b_.i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_max(a, b) simde_wasm_i8x16_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_max_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.i16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16);
r_.i16 = (m & a_.i16) | (~m & b_.i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_max(a, b) simde_wasm_i16x8_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_max_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i m = _mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i);
r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32);
r_.i32 = (m & a_.i32) | (~m & b_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? a_.i32[i] : b_.i32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_max(a, b) simde_wasm_i32x4_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_max_epu8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.u8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8);
r_.u8 = (m & a_.u8) | (~m & b_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_max(a, b) simde_wasm_u8x16_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_max_epu16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.sse_m128i = _mm_add_epi16(b, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.u16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16);
r_.u16 = (m & a_.u16) | (~m & b_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_max(a, b) simde_wasm_u16x8_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_max_epu32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-886057227 */
__m128i m =
_mm_xor_si128(
_mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i),
_mm_srai_epi32(_mm_xor_si128(a_.sse_m128i, b_.sse_m128i), 31)
);
r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.u32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32);
r_.u32 = (m & a_.u32) | (~m & b_.u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_max(a, b) simde_wasm_u32x4_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128 = _mm_blendv_ps(
_mm_set1_ps(SIMDE_MATH_NANF),
_mm_max_ps(a_.sse_m128, b_.sse_m128),
_mm_cmpord_ps(a_.sse_m128, b_.sse_m128));
#elif defined(SIMDE_X86_SSE_NATIVE)
__m128 m = _mm_or_ps(_mm_cmpneq_ps(a_.sse_m128, a_.sse_m128), _mm_cmpgt_ps(a_.sse_m128, b_.sse_m128));
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.ssse_m128 = _mm_blendv_ps(b_.sse_m128, a_.sse_m128, m);
#else
r_.sse_m128 =
_mm_or_ps(
_mm_and_ps(m, a_.sse_m128),
_mm_andnot_ps(m, b_.sse_m128)
);
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f32 =
vec_sel(
b_.altivec_f32,
a_.altivec_f32,
vec_orc(
vec_cmpgt(a_.altivec_f32, b_.altivec_f32),
vec_cmpeq(a_.altivec_f32, a_.altivec_f32)
)
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) cmpres = vec_cmpeq(a_.altivec_f32, a_.altivec_f32);
r_.altivec_f32 =
vec_sel(
b_.altivec_f32,
a_.altivec_f32,
vec_or(
vec_cmpgt(a_.altivec_f32, b_.altivec_f32),
vec_nor(cmpres, cmpres)
)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
int32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), (a_.f32 != a_.f32) | (a_.f32 > b_.f32));
r_.f32 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f32),
(
( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32)) |
(~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32))
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = (simde_math_isnan(a_.f32[i]) || (a_.f32[i] > b_.f32[i])) ? a_.f32[i] : b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_max(a, b) simde_wasm_f32x4_max((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_max (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_max(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128d = _mm_blendv_pd(
_mm_set1_pd(SIMDE_MATH_NAN),
_mm_max_pd(a_.sse_m128d, b_.sse_m128d),
_mm_cmpord_pd(a_.sse_m128d, b_.sse_m128d));
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128d m = _mm_or_pd(_mm_cmpneq_pd(a_.sse_m128d, a_.sse_m128d), _mm_cmpgt_pd(a_.sse_m128d, b_.sse_m128d));
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.ssse_m128d = _mm_blendv_pd(b_.sse_m128d, a_.sse_m128d, m);
#else
r_.sse_m128d =
_mm_or_pd(
_mm_and_pd(m, a_.sse_m128d),
_mm_andnot_pd(m, b_.sse_m128d)
);
#endif
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f64 =
vec_sel(
b_.altivec_f64,
a_.altivec_f64,
vec_orc(
vec_cmpgt(a_.altivec_f64, b_.altivec_f64),
vec_cmpeq(a_.altivec_f64, a_.altivec_f64)
)
);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) cmpres = vec_cmpeq(a_.altivec_f64, a_.altivec_f64);
r_.altivec_f64 =
vec_sel(
b_.altivec_f64,
a_.altivec_f64,
vec_or(
vec_cmpgt(a_.altivec_f64, b_.altivec_f64),
vec_nor(cmpres, cmpres)
)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
int64_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), (a_.f64 != a_.f64) | (a_.f64 > b_.f64));
r_.f64 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f64),
(
( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64)) |
(~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f64))
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = (simde_math_isnan(a_.f64[i]) || (a_.f64[i] > b_.f64[i])) ? a_.f64[i] : b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_max(a, b) simde_wasm_f64x2_max((a), (b))
#endif
/* add_sat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_add_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_add_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_adds_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(a_.u8) r1, r2, m;
r1 = a_.u8 + b_.u8;
r2 = (a_.u8 >> 7) + INT8_MAX;
m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r2 ^ b_.u8) | ~(b_.u8 ^ r1)) < 0);
r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r1 & m) | (r2 & ~m));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_add_sat(a, b) simde_wasm_i8x16_add_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_add_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_add_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_adds_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(a_.u16) r1, r2, m;
r1 = a_.u16 + b_.u16;
r2 = (a_.u16 >> 15) + INT16_MAX;
m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r2 ^ b_.u16) | ~(b_.u16 ^ r1)) < 0);
r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r1 & m) | (r2 & ~m));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_add_sat(a, b) simde_wasm_i16x8_add_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_add_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_add_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_adds_epu8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.u8 = a_.u8 + b_.u8;
r_.u8 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 < a_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_add_sat(a, b) simde_wasm_u8x16_add_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_add_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_add_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_adds_epu16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.u16 = a_.u16 + b_.u16;
r_.u16 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 < a_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_add_sat(a, b) simde_wasm_u16x8_add_sat((a), (b))
#endif
/* avgr */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_avgr (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_avgr(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_avg_epu8(a_.sse_m128i, b_.sse_m128i);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_avgr(a, b) simde_wasm_u8x16_avgr((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_avgr (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_avgr(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_avg_epu16(a_.sse_m128i, b_.sse_m128i);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_avgr(a, b) simde_wasm_u16x8_avgr((a), (b))
#endif
/* sub_sat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_sub_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_sub_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_subs_epi8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_subs(a_.altivec_i8, b_.altivec_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
const __typeof__(r_.i8) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (b_.i8 > a_.i8) ^ INT8_MAX);
const __typeof__(r_.i8) diff = a_.i8 - b_.i8;
const __typeof__(r_.i8) saturate = diff_sat ^ diff;
const __typeof__(r_.i8) m = saturate >> 7;
r_.i8 = (diff_sat & m) | (diff & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_sub_sat(a, b) simde_wasm_i8x16_sub_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_sub_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_sub_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_subs_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_subs(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
const __typeof__(r_.i16) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (b_.i16 > a_.i16) ^ INT16_MAX);
const __typeof__(r_.i16) diff = a_.i16 - b_.i16;
const __typeof__(r_.i16) saturate = diff_sat ^ diff;
const __typeof__(r_.i16) m = saturate >> 15;
r_.i16 = (diff_sat & m) | (diff & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_sub_sat(a, b) simde_wasm_i16x8_sub_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_sub_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_sub_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_subs_epu8(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.u8 = a_.u8 - b_.u8;
r_.u8 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 <= a_.u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_sub_sat(a, b) simde_wasm_u8x16_sub_sat((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_sub_sat (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_sub_sat(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.u16 = a_.u16 - b_.u16;
r_.u16 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 <= a_.u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_sub_sat(a, b) simde_wasm_u16x8_sub_sat((a), (b))
#endif
/* pmin */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_pmin (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_pmin(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_min_ps(b_.sse_m128, a_.sse_m128);
#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 =
vbslq_f32(
vcltq_f32(b_.neon_f32, a_.neon_f32),
b_.neon_f32,
a_.neon_f32
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 =
vec_sel(
a_.altivec_f32,
b_.altivec_f32,
vec_cmpgt(a_.altivec_f32, b_.altivec_f32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = (b_.f32[i] < a_.f32[i]) ? b_.f32[i] : a_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_pmin(a, b) simde_wasm_f32x4_pmin((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_pmin (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_pmin(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_min_pd(b_.sse_m128d, a_.sse_m128d);
#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f32 = vminq_f64(a_.neon_f64, b_.neon_f64);
#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 =
vbslq_f64(
vcltq_f64(b_.neon_f64, a_.neon_f64),
b_.neon_f64,
a_.neon_f64
);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 =
vec_sel(
a_.altivec_f32,
b_.altivec_f32,
vec_cmpgt(a_.altivec_f32, b_.altivec_f32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = (b_.f64[i] < a_.f64[i]) ? b_.f64[i] : a_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_pmin(a, b) simde_wasm_f64x2_pmin((a), (b))
#endif
/* pmax */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_pmax (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_pmax(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_max_ps(b_.sse_m128, a_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vbslq_f32(vcltq_f32(a_.neon_f32, b_.neon_f32), b_.neon_f32, a_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, vec_cmplt(a_.altivec_f32, b_.altivec_f32));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
int32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32);
r_.f32 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f32),
(
( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32)) |
(~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32))
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? b_.f32[i] : a_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_pmax(a, b) simde_wasm_f32x4_pmax((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_pmax (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_pmax(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_max_pd(b_.sse_m128d, a_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vbslq_f64(vcltq_f64(a_.neon_f64, b_.neon_f64), b_.neon_f64, a_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_f64 = vec_sel(a_.altivec_f64, b_.altivec_f64, vec_cmplt(a_.altivec_f64, b_.altivec_f64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
int64_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64 < b_.f64);
r_.f64 =
HEDLEY_REINTERPRET_CAST(
__typeof__(r_.f64),
(
( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f64)) |
(~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64))
)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? b_.f64[i] : a_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_pmax(a, b) simde_wasm_f64x2_pmax((a), (b))
#endif
/* div */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_div (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_div(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_div_ps(a_.sse_m128, b_.sse_m128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f32 = a_.f32 / b_.f32;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = a_.f32[i] / b_.f32[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_div(a, b) simde_wasm_f32x4_div((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_div (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_div(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_div_pd(a_.sse_m128d, b_.sse_m128d);
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
r_.f64 = a_.f64 / b_.f64;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = a_.f64[i] / b_.f64[i];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_div(a, b) simde_wasm_f64x2_div((a), (b))
#endif
/* shuffle */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_shuffle (
simde_v128_t a, simde_v128_t b,
const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7,
const int c8, const int c9, const int c10, const int c11, const int c12, const int c13, const int c14, const int c15) {
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
r_.i8[ 0] = ( c0 < 16) ? a_.i8[ c0] : b_.i8[ c0 & 15];
r_.i8[ 1] = ( c1 < 16) ? a_.i8[ c1] : b_.i8[ c1 & 15];
r_.i8[ 2] = ( c2 < 16) ? a_.i8[ c2] : b_.i8[ c2 & 15];
r_.i8[ 3] = ( c3 < 16) ? a_.i8[ c3] : b_.i8[ c3 & 15];
r_.i8[ 4] = ( c4 < 16) ? a_.i8[ c4] : b_.i8[ c4 & 15];
r_.i8[ 5] = ( c5 < 16) ? a_.i8[ c5] : b_.i8[ c5 & 15];
r_.i8[ 6] = ( c6 < 16) ? a_.i8[ c6] : b_.i8[ c6 & 15];
r_.i8[ 7] = ( c7 < 16) ? a_.i8[ c7] : b_.i8[ c7 & 15];
r_.i8[ 8] = ( c8 < 16) ? a_.i8[ c8] : b_.i8[ c8 & 15];
r_.i8[ 9] = ( c9 < 16) ? a_.i8[ c9] : b_.i8[ c9 & 15];
r_.i8[10] = (c10 < 16) ? a_.i8[c10] : b_.i8[c10 & 15];
r_.i8[11] = (c11 < 16) ? a_.i8[c11] : b_.i8[c11 & 15];
r_.i8[12] = (c12 < 16) ? a_.i8[c12] : b_.i8[c12 & 15];
r_.i8[13] = (c13 < 16) ? a_.i8[c13] : b_.i8[c13 & 15];
r_.i8[14] = (c14 < 16) ? a_.i8[c14] : b_.i8[c14 & 15];
r_.i8[15] = (c15 < 16) ? a_.i8[c15] : b_.i8[c15 & 15];
return simde_v128_from_private(r_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i8x16_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
wasm_i8x16_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15)
#elif defined(SIMDE_SHUFFLE_VECTOR_)
#define \
simde_wasm_i8x16_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
(__extension__ ({ \
HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(8, 16, \
HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), a), \
HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), b), \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15)); \
}))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i8x16_shuffle(a, b, \
c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, c10, c11, c12, c13, c14, c15) \
simde_wasm_i8x16_shuffle((a), (b), \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \
(c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_shuffle (
simde_v128_t a, simde_v128_t b,
const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7) {
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
r_.i16[ 0] = (c0 < 8) ? a_.i16[ c0] : b_.i16[ c0 & 7];
r_.i16[ 1] = (c1 < 8) ? a_.i16[ c1] : b_.i16[ c1 & 7];
r_.i16[ 2] = (c2 < 8) ? a_.i16[ c2] : b_.i16[ c2 & 7];
r_.i16[ 3] = (c3 < 8) ? a_.i16[ c3] : b_.i16[ c3 & 7];
r_.i16[ 4] = (c4 < 8) ? a_.i16[ c4] : b_.i16[ c4 & 7];
r_.i16[ 5] = (c5 < 8) ? a_.i16[ c5] : b_.i16[ c5 & 7];
r_.i16[ 6] = (c6 < 8) ? a_.i16[ c6] : b_.i16[ c6 & 7];
r_.i16[ 7] = (c7 < 8) ? a_.i16[ c7] : b_.i16[ c7 & 7];
return simde_v128_from_private(r_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i16x8_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7) \
wasm_i16x8_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7)
#elif defined(SIMDE_SHUFFLE_VECTOR_)
#define \
simde_wasm_i16x8_shuffle( \
a, b, \
c0, c1, c2, c3, c4, c5, c6, c7) \
(__extension__ ({ \
HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(16, 16, \
HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), a), \
HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), b), \
c0, c1, c2, c3, c4, c5, c6, c7)); \
}))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i16x8_shuffle(a, b, \
c0, c1, c2, c3, c4, c5, c6, c7) \
simde_wasm_i16x8_shuffle((a), (b), \
(c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_shuffle (
simde_v128_t a, simde_v128_t b,
const int c0, const int c1, const int c2, const int c3) {
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
r_.i32[ 0] = (c0 < 4) ? a_.i32[ c0] : b_.i32[ c0 & 3];
r_.i32[ 1] = (c1 < 4) ? a_.i32[ c1] : b_.i32[ c1 & 3];
r_.i32[ 2] = (c2 < 4) ? a_.i32[ c2] : b_.i32[ c2 & 3];
r_.i32[ 3] = (c3 < 4) ? a_.i32[ c3] : b_.i32[ c3 & 3];
return simde_v128_from_private(r_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i32x4_shuffle( \
a, b, \
c0, c1, c2, c3) \
wasm_i32x4_shuffle( \
a, b, \
c0, c1, c2, c3)
#elif defined(SIMDE_SHUFFLE_VECTOR_)
#define \
simde_wasm_i32x4_shuffle( \
a, b, \
c0, c1, c2, c3) \
(__extension__ ({ \
HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(32, 16, \
HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), a), \
HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), b), \
c0, c1, c2, c3)); \
}))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i32x4_shuffle(a, b, \
c0, c1, c2, c3) \
simde_wasm_i32x4_shuffle((a), (b), \
(c0), (c1), (c2), (c3))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_shuffle (
simde_v128_t a, simde_v128_t b,
const int c0, const int c1) {
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
r_.i64[ 0] = (c0 < 2) ? a_.i64[ c0] : b_.i64[ c0 & 1];
r_.i64[ 1] = (c1 < 2) ? a_.i64[ c1] : b_.i64[ c1 & 1];
return simde_v128_from_private(r_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define \
simde_wasm_i64x2_shuffle( \
a, b, \
c0, c1) \
wasm_i64x2_shuffle( \
a, b, \
c0, c1)
#elif defined(SIMDE_SHUFFLE_VECTOR_)
#define \
simde_wasm_i64x2_shuffle( \
a, b, \
c0, c1) \
(__extension__ ({ \
HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(64, 16, \
HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), a), \
HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), b), \
c0, c1)); \
}))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define \
wasm_i64x2_shuffle(a, b, \
c0, c1) \
simde_wasm_i64x2_shuffle((a), (b), \
(c0), (c1))
#endif
/* swizzle */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_swizzle (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_swizzle(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
int8x8x2_t tmp = { { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } };
r_.neon_i8 = vcombine_s8(
vtbl2_s8(tmp, vget_low_s8(b_.neon_i8)),
vtbl2_s8(tmp, vget_high_s8(b_.neon_i8))
);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
/* https://github.com/WebAssembly/simd/issues/68#issuecomment-470825324 */
r_.sse_m128i =
_mm_shuffle_epi8(
a_.sse_m128i,
_mm_adds_epu8(
_mm_set1_epi8(0x70),
b_.sse_m128i));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_perm(
a_.altivec_i8,
a_.altivec_i8,
b_.altivec_u8
);
r_.altivec_i8 = vec_and(r_.altivec_i8, vec_cmple(b_.altivec_u8, vec_splat_u8(15)));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
r_.i8[i] = (b_.u8[i] > 15) ? INT8_C(0) : a_.i8[b_.u8[i]];
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_swizzle(a, b) simde_wasm_i8x16_swizzle((a), (b))
#endif
/* narrow */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_narrow_i16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_packs_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN };
const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX };
int16_t m SIMDE_VECTOR(32);
m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min);
v = (v & ~m) | (min & m);
m = v > max;
v = (v & ~m) | (max & m);
SIMDE_CONVERT_VECTOR_(r_.i8, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7];
r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_narrow_i16x8(a, b) simde_wasm_i8x16_narrow_i16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_narrow_i32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7);
const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN };
const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX };
int32_t m SIMDE_VECTOR(32);
m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min);
v = (v & ~m) | (min & m);
m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max);
v = (v & ~m) | (max & m);
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3];
r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_narrow_i32x4(a, b) simde_wasm_i16x8_narrow_i32x4((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u8x16_narrow_i16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_BUG_CLANG_46840)
r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16);
#else
r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16);
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 =
vcombine_u8(
vqmovun_s16(a_.neon_i16),
vqmovun_s16(b_.neon_i16)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_packus_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
v &= ~(v >> 15);
v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX);
SIMDE_CONVERT_VECTOR_(r_.i8, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7];
r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u8x16_narrow_i16x8(a, b) simde_wasm_u8x16_narrow_i16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_narrow_i32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_BUG_CLANG_46840)
r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32);
#else
r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32);
#endif
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 =
vcombine_u16(
vqmovun_s32(a_.neon_i32),
vqmovun_s32(b_.neon_i32)
);
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_packus_epi32(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i max = _mm_set1_epi32(UINT16_MAX);
const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a_.sse_m128i, 31), a_.sse_m128i);
const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b_.sse_m128i, 31), b_.sse_m128i);
r_.sse_m128i =
_mm_packs_epi32(
_mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16),
_mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16)
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32);
#elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7);
v &= ~(v >> 31);
v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX);
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3];
r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v));
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_narrow_i32x4(a, b) simde_wasm_u16x8_narrow_i32x4((a), (b))
#endif
/* demote */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_demote_f64x2_zero (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_demote_f64x2_zero(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cvtpd_ps(a_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 = vec_floate(a_.altivec_f64);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_f32 =
HEDLEY_REINTERPRET_CAST(
SIMDE_POWER_ALTIVEC_VECTOR(float),
vec_pack(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_f32),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0))
)
);
#else
const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0x00, 0x01, 0x02, 0x03, /* 0 */
0x08, 0x09, 0x0a, 0x0b, /* 2 */
0x10, 0x11, 0x12, 0x13, /* 4 */
0x18, 0x19, 0x1a, 0x1b /* 6 */
};
r_.altivec_f32 = vec_perm(r_.altivec_f32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_splat_s32(0)), perm);
#endif
#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector)
float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f };
r_.f32 = __builtin_shufflevector(__builtin_convertvector(a_.f64, __typeof__(z)), z, 0, 1, 2, 3);
#else
r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]);
r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]);
r_.f32[2] = SIMDE_FLOAT32_C(0.0);
r_.f32[3] = SIMDE_FLOAT32_C(0.0);
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_demote_f64x2_zero(a) simde_wasm_f32x4_demote_f64x2_zero((a))
#endif
/* extend_low */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_extend_low_i8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_extend_low_i8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmovl_s8(vget_low_s8(a_.neon_i8));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi8_epi16(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 =
vec_sra(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergeh(a_.altivec_i8, a_.altivec_i8)),
vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8)
)
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int8_t v SIMDE_VECTOR(8) = {
a_.i8[0], a_.i8[1], a_.i8[2], a_.i8[3],
a_.i8[4], a_.i8[5], a_.i8[6], a_.i8[7]
};
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extend_low_i8x16(a) simde_wasm_i16x8_extend_low_i8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_extend_low_i16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_extend_low_i16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi16_epi32(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srai_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 =
vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergeh(a_.altivec_i16, a_.altivec_i16)),
vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int16_t v SIMDE_VECTOR(8) = { a_.i16[0], a_.i16[1], a_.i16[2], a_.i16[3] };
SIMDE_CONVERT_VECTOR_(r_.i32, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extend_low_i16x8(a) simde_wasm_i32x4_extend_low_i16x8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_extend_low_i32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_extend_low_i32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi32_epi64(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_unpacklo_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_i64 =
vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergeh(a_.altivec_i32, a_.altivec_i32)),
vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32))
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 =
vec_mergeh(
a_.altivec_i32,
HEDLEY_REINTERPRET_CAST(
SIMDE_POWER_ALTIVEC_VECTOR(int),
vec_cmpgt(vec_splat_s32(0), a_.altivec_i32)
)
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int32_t v SIMDE_VECTOR(8) = { a_.i32[0], a_.i32[1] };
SIMDE_CONVERT_VECTOR_(r_.i64, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_extend_low_i32x4(a) simde_wasm_i64x2_extend_low_i32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_extend_low_u8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_extend_low_u8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmovl_u8(vget_low_u8(a_.neon_u8));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu8_epi16(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srli_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_mergeh(a_.altivec_i8, vec_splat_s8(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint8_t v SIMDE_VECTOR(8) = {
a_.u8[0], a_.u8[1], a_.u8[2], a_.u8[3],
a_.u8[4], a_.u8[5], a_.u8[6], a_.u8[7]
};
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.u8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extend_low_u8x16(a) simde_wasm_u16x8_extend_low_u8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_extend_low_u16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_extend_low_u16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu16_epi32(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srli_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_mergeh(a_.altivec_i16, vec_splat_s16(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint16_t v SIMDE_VECTOR(8) = { a_.u16[0], a_.u16[1], a_.u16[2], a_.u16[3] };
SIMDE_CONVERT_VECTOR_(r_.i32, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.u16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_extend_low_u16x8(a) simde_wasm_u32x4_extend_low_u16x8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_extend_low_u32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_extend_low_u32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu32_epi64(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =_mm_unpacklo_epi32(a_.sse_m128i, _mm_setzero_si128());
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_mergeh(a_.altivec_i32, vec_splat_s32(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint32_t v SIMDE_VECTOR(8) = { a_.u32[0], a_.u32[1] };
SIMDE_CONVERT_VECTOR_(r_.u64, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = HEDLEY_STATIC_CAST(int64_t, a_.u32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_extend_low_u32x4(a) simde_wasm_u64x2_extend_low_u32x4((a))
#endif
/* promote */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_promote_low_f32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_promote_low_f32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128d = _mm_cvtps_pd(a_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = vec_unpackh(a_.altivec_f32);
#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector)
r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.f32, a_.f32, 0, 1), __typeof__(r_.f64));
#else
r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[0]);
r_.f64[1] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[1]);
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_promote_low_f32x4(a) simde_wasm_f64x2_promote_low_f32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_extend_high_i8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_extend_high_i8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmovl_s8(vget_high_s8(a_.neon_i8));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 =
vec_sra(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergel(a_.altivec_i8, a_.altivec_i8)),
vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8)
)
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int8_t v SIMDE_VECTOR(8) = {
a_.i8[ 8], a_.i8[ 9], a_.i8[10], a_.i8[11],
a_.i8[12], a_.i8[13], a_.i8[14], a_.i8[15]
};
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extend_high_i8x16(a) simde_wasm_i16x8_extend_high_i8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_extend_high_i16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_extend_high_i16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmovl_s16(vget_high_s16(a_.neon_i16));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srai_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 =
vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergel(a_.altivec_i16, a_.altivec_i16)),
vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int16_t v SIMDE_VECTOR(8) = { a_.i16[4], a_.i16[5], a_.i16[6], a_.i16[7] };
SIMDE_CONVERT_VECTOR_(r_.i32, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extend_high_i16x8(a) simde_wasm_i32x4_extend_high_i16x8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_extend_high_i32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_extend_high_i32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vmovl_s32(vget_high_s32(a_.neon_i32));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_unpackhi_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_i64 =
vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergel(a_.altivec_i32, a_.altivec_i32)),
vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32))
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 =
vec_mergel(
a_.altivec_i32,
HEDLEY_REINTERPRET_CAST(
SIMDE_POWER_ALTIVEC_VECTOR(int),
vec_cmpgt(vec_splat_s32(0), a_.altivec_i32)
)
);
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const int32_t v SIMDE_VECTOR(8) = { a_.i32[2], a_.i32[3] };
SIMDE_CONVERT_VECTOR_(r_.i64, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_extend_high_i32x4(a) simde_wasm_i64x2_extend_high_i32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_extend_high_u8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_extend_high_u8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmovl_u8(vget_high_u8(a_.neon_u8));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srli_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i8 = vec_mergel(a_.altivec_i8, vec_splat_s8(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint8_t v SIMDE_VECTOR(8) = {
a_.u8[ 8], a_.u8[ 9], a_.u8[10], a_.u8[11],
a_.u8[12], a_.u8[13], a_.u8[14], a_.u8[15]
};
SIMDE_CONVERT_VECTOR_(r_.u16, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extend_high_u8x16(a) simde_wasm_u16x8_extend_high_u8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_extend_high_u16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_extend_high_u16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmovl_u16(vget_high_u16(a_.neon_u16));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_srli_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 = vec_mergel(a_.altivec_i16, vec_splat_s16(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint16_t v SIMDE_VECTOR(8) = { a_.u16[4], a_.u16[5], a_.u16[6], a_.u16[7] };
SIMDE_CONVERT_VECTOR_(r_.u32, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_extend_high_u16x8(a) simde_wasm_u32x4_extend_high_u16x8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_extend_high_u32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_extend_high_u32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u64 = vmovl_u32(vget_high_u32(a_.neon_u32));
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i = _mm_cvtepu32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2)));
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =_mm_unpackhi_epi32(a_.sse_m128i, _mm_setzero_si128());
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_mergel(a_.altivec_i32, vec_splat_s32(0));
#elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
const uint32_t v SIMDE_VECTOR(8) = { a_.u32[2], a_.u32[3] };
SIMDE_CONVERT_VECTOR_(r_.u64, v);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i + 2]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_extend_high_u32x4(a) simde_wasm_u64x2_extend_high_u32x4((a))
#endif
/* extmul_low */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_extmul_low_i8x16 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_extmul_low_i8x16(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmull_s8(vget_low_s8(a_.neon_i8), vget_low_s8(b_.neon_i8));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed char) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(signed char) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_i8, a_.altivec_i8);
bshuf = vec_mergeh(b_.altivec_i8, b_.altivec_i8);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
};
ashuf = vec_perm(a_.altivec_i8, a_.altivec_i8, perm);
bshuf = vec_perm(b_.altivec_i8, b_.altivec_i8, perm);
#endif
r_.altivec_i16 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_mullo_epi16(
_mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8),
_mm_srai_epi16(_mm_unpacklo_epi8(b_.sse_m128i, b_.sse_m128i), 8)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i16 =
__builtin_convertvector(
__builtin_shufflevector(a_.i8, a_.i8, 0, 1, 2, 3, 4, 5, 6, 7),
__typeof__(r_.i16)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i8, b_.i8, 0, 1, 2, 3, 4, 5, 6, 7),
__typeof__(r_.i16)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extmul_low_i8x16(a, b) simde_wasm_i16x8_extmul_low_i8x16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_extmul_low_i16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_extmul_low_i16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed short) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(signed short) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_i16, a_.altivec_i16);
bshuf = vec_mergeh(b_.altivec_i16, b_.altivec_i16);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 1, 0, 1,
2, 3, 2, 3,
4, 5, 4, 5,
6, 7, 6, 7
};
ashuf = vec_perm(a_.altivec_i16, a_.altivec_i16, perm);
bshuf = vec_perm(b_.altivec_i16, b_.altivec_i16, perm);
#endif
r_.altivec_i32 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_unpacklo_epi16(
_mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i),
_mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i32 =
__builtin_convertvector(
__builtin_shufflevector(a_.i16, a_.i16, 0, 1, 2, 3),
__typeof__(r_.i32)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i16, b_.i16, 0, 1, 2, 3),
__typeof__(r_.i32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extmul_low_i16x8(a, b) simde_wasm_i32x4_extmul_low_i16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_extmul_low_i32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_extmul_low_i32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vmull_s32(vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_i32, a_.altivec_i32);
bshuf = vec_mergeh(b_.altivec_i32, b_.altivec_i32);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 1, 2, 3, 0, 1, 2, 3,
4, 5, 6, 7, 4, 5, 6, 7
};
ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm);
bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm);
#endif
r_.altivec_i64 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i =
_mm_mul_epi32(
_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)),
_mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0))
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i64 =
__builtin_convertvector(
__builtin_shufflevector(a_.i32, a_.i32, 0, 1),
__typeof__(r_.i64)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i32, b_.i32, 0, 1),
__typeof__(r_.i64)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_extmul_low_i32x4(a, b) simde_wasm_i64x2_extmul_low_i32x4((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_extmul_low_u8x16 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_extmul_low_u8x16(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmull_u8(vget_low_u8(a_.neon_u8), vget_low_u8(b_.neon_u8));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_u8, a_.altivec_u8);
bshuf = vec_mergeh(b_.altivec_u8, b_.altivec_u8);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
};
ashuf = vec_perm(a_.altivec_u8, a_.altivec_u8, perm);
bshuf = vec_perm(b_.altivec_u8, b_.altivec_u8, perm);
#endif
r_.altivec_u16 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u16 =
__builtin_convertvector(
__builtin_shufflevector(a_.u8, a_.u8, 0, 1, 2, 3, 4, 5, 6, 7),
__typeof__(r_.u16)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u8, b_.u8, 0, 1, 2, 3, 4, 5, 6, 7),
__typeof__(r_.u16)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extmul_low_u8x16(a, b) simde_wasm_u16x8_extmul_low_u8x16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_extmul_low_u16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_extmul_low_u16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmull_u16(vget_low_u16(a_.neon_u16), vget_low_u16(b_.neon_u16));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_u16, a_.altivec_u16);
bshuf = vec_mergeh(b_.altivec_u16, b_.altivec_u16);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 1, 0, 1,
2, 3, 2, 3,
4, 5, 4, 5,
6, 7, 6, 7
};
ashuf = vec_perm(a_.altivec_u16, a_.altivec_u16, perm);
bshuf = vec_perm(b_.altivec_u16, b_.altivec_u16, perm);
#endif
r_.altivec_u32 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_unpacklo_epi16(
_mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i),
_mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u32 =
__builtin_convertvector(
__builtin_shufflevector(a_.u16, a_.u16, 0, 1, 2, 3),
__typeof__(r_.u32)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u16, b_.u16, 0, 1, 2, 3),
__typeof__(r_.u32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_extmul_low_u16x8(a, b) simde_wasm_u32x4_extmul_low_u16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_extmul_low_u32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_extmul_low_u32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u64 = vmull_u32(vget_low_u32(a_.neon_u32), vget_low_u32(b_.neon_u32));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergeh(a_.altivec_u32, a_.altivec_u32);
bshuf = vec_mergeh(b_.altivec_u32, b_.altivec_u32);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 1, 2, 3, 0, 1, 2, 3,
4, 5, 6, 7, 4, 5, 6, 7
};
ashuf = vec_perm(a_.altivec_u32, a_.altivec_u32, perm);
bshuf = vec_perm(b_.altivec_u32, b_.altivec_u32, perm);
#endif
r_.altivec_u64 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_mul_epu32(
_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)),
_mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0))
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u64 =
__builtin_convertvector(
__builtin_shufflevector(a_.u32, a_.u32, 0, 1),
__typeof__(r_.u64)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u32, b_.u32, 0, 1),
__typeof__(r_.u64)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_extmul_low_u32x4(a, b) simde_wasm_u64x2_extmul_low_u32x4((a), (b))
#endif
/* extmul_high */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_extmul_high_i8x16 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_extmul_high_i8x16(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i16 = vmull_high_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vmull_s8(vget_high_s8(a_.neon_i8), vget_high_s8(b_.neon_i8));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i16 =
vec_mule(
vec_mergel(a_.altivec_i8, a_.altivec_i8),
vec_mergel(b_.altivec_i8, b_.altivec_i8)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_mullo_epi16(
_mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8),
_mm_srai_epi16(_mm_unpackhi_epi8(b_.sse_m128i, b_.sse_m128i), 8)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i16 =
__builtin_convertvector(
__builtin_shufflevector(a_.i8, a_.i8, 8, 9, 10, 11, 12, 13, 14, 15),
__typeof__(r_.i16)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i8, b_.i8, 8, 9, 10, 11, 12, 13, 14, 15),
__typeof__(r_.i16)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i + 8]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extmul_high_i8x16(a, b) simde_wasm_i16x8_extmul_high_i8x16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_extmul_high_i16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_extmul_high_i16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i32 = vmull_high_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 =
vec_mule(
vec_mergel(a_.altivec_i16, a_.altivec_i16),
vec_mergel(b_.altivec_i16, b_.altivec_i16)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_unpackhi_epi16(
_mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i),
_mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i32 =
__builtin_convertvector(
__builtin_shufflevector(a_.i16, a_.i16, 4, 5, 6, 7),
__typeof__(r_.i32)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i16, b_.i16, 4, 5, 6, 7),
__typeof__(r_.i32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i + 4]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extmul_high_i16x8(a, b) simde_wasm_i32x4_extmul_high_i16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_extmul_high_i32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_extmul_high_i32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i64 = vmull_high_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i64 = vmull_s32(vget_high_s32(a_.neon_i32), vget_high_s32(b_.neon_i32));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf;
SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf;
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
ashuf = vec_mergel(a_.altivec_i32, a_.altivec_i32);
bshuf = vec_mergel(b_.altivec_i32, b_.altivec_i32);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
8, 9, 10, 11, 8, 9, 10, 11,
12, 13, 14, 15, 12, 13, 14, 15
};
ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm);
bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm);
#endif
r_.altivec_i64 = vec_mule(ashuf, bshuf);
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i =
_mm_mul_epi32(
_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)),
_mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2))
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.i64 =
__builtin_convertvector(
__builtin_shufflevector(a_.i32, a_.i32, 2, 3),
__typeof__(r_.i64)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.i32, b_.i32, 2, 3),
__typeof__(r_.i64)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i + 2]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_extmul_high_i32x4(a, b) simde_wasm_i64x2_extmul_high_i32x4((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_extmul_high_u8x16 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_extmul_high_u8x16(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u16 = vmull_high_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vmull_u8(vget_high_u8(a_.neon_u8), vget_high_u8(b_.neon_u8));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u16 =
vec_mule(
vec_mergel(a_.altivec_u8, a_.altivec_u8),
vec_mergel(b_.altivec_u8, b_.altivec_u8)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u16 =
__builtin_convertvector(
__builtin_shufflevector(a_.u8, a_.u8, 8, 9, 10, 11, 12, 13, 14, 15),
__typeof__(r_.u16)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u8, b_.u8, 8, 9, 10, 11, 12, 13, 14, 15),
__typeof__(r_.u16)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i + 8]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extmul_high_u8x16(a, b) simde_wasm_u16x8_extmul_high_u8x16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_extmul_high_u16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_extmul_high_u16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u32 = vmull_high_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vmull_u16(vget_high_u16(a_.neon_u16), vget_high_u16(b_.neon_u16));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_u32 =
vec_mule(
vec_mergel(a_.altivec_u16, a_.altivec_u16),
vec_mergel(b_.altivec_u16, b_.altivec_u16)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_unpackhi_epi16(
_mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i),
_mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u32 =
__builtin_convertvector(
__builtin_shufflevector(a_.u16, a_.u16, 4, 5, 6, 7),
__typeof__(r_.u32)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u16, b_.u16, 4, 5, 6, 7),
__typeof__(r_.u32)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i + 4]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_extmul_high_u16x8(a, b) simde_wasm_u32x4_extmul_high_u16x8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_extmul_high_u32x4 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_extmul_high_u32x4(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u64 = vmull_high_u32(a_.neon_u32, b_.neon_u32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u64 = vmull_u32(vget_high_u32(a_.neon_u32), vget_high_u32(b_.neon_u32));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_u64 =
vec_mule(
vec_mergel(a_.altivec_u32, a_.altivec_u32),
vec_mergel(b_.altivec_u32, b_.altivec_u32)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_mul_epu32(
_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)),
_mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2))
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.u64 =
__builtin_convertvector(
__builtin_shufflevector(a_.u32, a_.u32, 2, 3),
__typeof__(r_.u64)
)
*
__builtin_convertvector(
__builtin_shufflevector(b_.u32, b_.u32, 2, 3),
__typeof__(r_.u64)
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i + 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i + 2]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_extmul_high_u32x4(a, b) simde_wasm_u64x2_extmul_high_u32x4((a), (b))
#endif
/* extadd_pairwise */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_extadd_pairwise_i8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_extadd_pairwise_i8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vpaddlq_s8(a_.neon_i8);
#elif defined(SIMDE_X86_XOP_NATIVE)
r_.sse_m128i = _mm_haddw_epi8(a_.sse_m128i);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
r_.sse_m128i = _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a_.sse_m128i);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1);
r_.altivec_i16 =
vec_add(
vec_mule(a_.altivec_i8, one),
vec_mulo(a_.altivec_i8, one)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i16 =
((a_.i16 << 8) >> 8) +
((a_.i16 >> 8) );
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_extadd_pairwise_i8x16(a) simde_wasm_i16x8_extadd_pairwise_i8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_extadd_pairwise_i16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_extadd_pairwise_i16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vpaddlq_s16(a_.neon_i16);
#elif defined(SIMDE_X86_XOP_NATIVE)
r_.sse_m128i = _mm_haddd_epi16(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, _mm_set1_epi16(INT8_C(1)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1);
r_.altivec_i32 =
vec_add(
vec_mule(a_.altivec_i16, one),
vec_mulo(a_.altivec_i16, one)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i32 =
((a_.i32 << 16) >> 16) +
((a_.i32 >> 16) );
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_extadd_pairwise_i16x8(a) simde_wasm_i32x4_extadd_pairwise_i16x8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_extadd_pairwise_u8x16 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_extadd_pairwise_u8x16(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vpaddlq_u8(a_.neon_u8);
#elif defined(SIMDE_X86_XOP_NATIVE)
r_.sse_m128i = _mm_haddw_epu8(a_.sse_m128i);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
r_.sse_m128i = _mm_maddubs_epi16(a_.sse_m128i, _mm_set1_epi8(INT8_C(1)));
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1);
r_.altivec_u16 =
vec_add(
vec_mule(a_.altivec_u8, one),
vec_mulo(a_.altivec_u8, one)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.u16 =
((a_.u16 << 8) >> 8) +
((a_.u16 >> 8) );
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_extadd_pairwise_u8x16(a) simde_wasm_u16x8_extadd_pairwise_u8x16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_extadd_pairwise_u16x8 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_extadd_pairwise_u16x8(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vpaddlq_u16(a_.neon_u16);
#elif defined(SIMDE_X86_XOP_NATIVE)
r_.sse_m128i = _mm_haddd_epu16(a_.sse_m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i =
_mm_add_epi32(
_mm_srli_epi32(a_.sse_m128i, 16),
_mm_and_si128(a_.sse_m128i, _mm_set1_epi32(INT32_C(0x0000ffff)))
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1);
r_.altivec_u32 =
vec_add(
vec_mule(a_.altivec_u16, one),
vec_mulo(a_.altivec_u16, one)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.u32 =
((a_.u32 << 16) >> 16) +
((a_.u32 >> 16) );
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_extadd_pairwise_u16x8(a) simde_wasm_u32x4_extadd_pairwise_u16x8((a))
#endif
/* X_load_Y */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i16x8_load8x8 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i16x8_load8x8(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
int8_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.i16, v);
#else
SIMDE_ALIGN_TO_16 int8_t v[8];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i16x8_load8x8(mem) simde_wasm_i16x8_load8x8((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_load16x4 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_load16x4(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
int16_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.i32, v);
#else
SIMDE_ALIGN_TO_16 int16_t v[4];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_load16x4(mem) simde_wasm_i32x4_load16x4((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i64x2_load32x2 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i64x2_load32x2(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) && !defined(SIMDE_BUG_CLANG_50893)
int32_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.i64, v);
#else
SIMDE_ALIGN_TO_16 int32_t v[2];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i64x2_load32x2(mem) simde_wasm_i64x2_load32x2((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u16x8_load8x8 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u16x8_load8x8(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
uint8_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.u16, v);
#else
SIMDE_ALIGN_TO_16 uint8_t v[8];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u16x8_load8x8(mem) simde_wasm_u16x8_load8x8((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_load16x4 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_load16x4(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
uint16_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.u32, v);
#else
SIMDE_ALIGN_TO_16 uint16_t v[4];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_load16x4(mem) simde_wasm_u32x4_load16x4((mem))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u64x2_load32x2 (const void * mem) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u64x2_load32x2(mem);
#else
simde_v128_private r_;
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762)
uint32_t v SIMDE_VECTOR(8);
simde_memcpy(&v, mem, sizeof(v));
SIMDE_CONVERT_VECTOR_(r_.u64, v);
#else
SIMDE_ALIGN_TO_16 uint32_t v[2];
simde_memcpy(v, mem, sizeof(v));
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, v[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u64x2_load32x2(mem) simde_wasm_u64x2_load32x2((mem))
#endif
/* load*_zero */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load32_zero (const void * a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load32_zero(a);
#else
simde_v128_private r_;
int32_t a_;
simde_memcpy(&a_, a, sizeof(a_));
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_cvtsi32_si128(a_);
#else
r_.i32[0] = a_;
r_.i32[1] = 0;
r_.i32[2] = 0;
r_.i32[3] = 0;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load32_zero(a) simde_wasm_v128_load32_zero((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load64_zero (const void * a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_v128_load64_zero(a);
#else
simde_v128_private r_;
int64_t a_;
simde_memcpy(&a_, a, sizeof(a_));
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
r_.sse_m128i = _mm_cvtsi64_si128(a_);
#else
r_.i64[0] = a_;
r_.i64[1] = 0;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load64_zero(a) simde_wasm_v128_load64_zero((a))
#endif
/* load*_lane */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load8_lane (const void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_v128_private
a_ = simde_v128_to_private(vec);
#if defined(SIMDE_BUG_CLANG_50901)
simde_v128_private r_ = simde_v128_to_private(vec);
r_.altivec_i8 = vec_insert(*HEDLEY_REINTERPRET_CAST(const signed char *, a), a_.altivec_i8, lane);
return simde_v128_from_private(r_);
#else
a_.i8[lane] = *HEDLEY_REINTERPRET_CAST(const int8_t *, a);
return simde_v128_from_private(a_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_load8_lane(a, vec, lane) wasm_v128_load8_lane(HEDLEY_CONST_CAST(int8_t *, (a)), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load8_lane(a, vec, lane) simde_wasm_v128_load8_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load16_lane (const void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_v128_private
a_ = simde_v128_to_private(vec);
a_.i16[lane] = *HEDLEY_REINTERPRET_CAST(const int16_t *, a);
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_load16_lane(a, vec, lane) wasm_v128_load16_lane(HEDLEY_CONST_CAST(int16_t *, (a)), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load16_lane(a, vec, lane) simde_wasm_v128_load16_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load32_lane (const void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_v128_private
a_ = simde_v128_to_private(vec);
a_.i32[lane] = *HEDLEY_REINTERPRET_CAST(const int32_t *, a);
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_load32_lane(a, vec, lane) wasm_v128_load32_lane(HEDLEY_CONST_CAST(int32_t *, (a)), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load32_lane(a, vec, lane) simde_wasm_v128_load32_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_v128_load64_lane (const void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_v128_private
a_ = simde_v128_to_private(vec);
a_.i64[lane] = *HEDLEY_REINTERPRET_CAST(const int64_t *, a);
return simde_v128_from_private(a_);
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_load64_lane(a, vec, lane) wasm_v128_load64_lane(HEDLEY_CONST_CAST(int64_t *, (a)), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_load64_lane(a, vec, lane) simde_wasm_v128_load64_lane((a), (vec), (lane))
#endif
/* store*_lane */
SIMDE_FUNCTION_ATTRIBUTES
void
simde_wasm_v128_store8_lane (void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_v128_private
vec_ = simde_v128_to_private(vec);
int8_t tmp = vec_.i8[lane];
simde_memcpy(a, &tmp, sizeof(tmp));
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_store8_lane(a, vec, lane) wasm_v128_store8_lane((a), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_store8_lane(a, vec, lane) simde_wasm_v128_store8_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
void
simde_wasm_v128_store16_lane (void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_v128_private
vec_ = simde_v128_to_private(vec);
int16_t tmp = vec_.i16[lane];
simde_memcpy(a, &tmp, sizeof(tmp));
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_store16_lane(a, vec, lane) wasm_v128_store16_lane((a), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_store16_lane(a, vec, lane) simde_wasm_v128_store16_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
void
simde_wasm_v128_store32_lane (void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_v128_private
vec_ = simde_v128_to_private(vec);
int32_t tmp = vec_.i32[lane];
simde_memcpy(a, &tmp, sizeof(tmp));
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_store32_lane(a, vec, lane) wasm_v128_store32_lane((a), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_store32_lane(a, vec, lane) simde_wasm_v128_store32_lane((a), (vec), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
void
simde_wasm_v128_store64_lane (void * a, simde_v128_t vec, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_v128_private
vec_ = simde_v128_to_private(vec);
int64_t tmp = vec_.i64[lane];
simde_memcpy(a, &tmp, sizeof(tmp));
}
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#define simde_wasm_v128_store64_lane(a, vec, lane) wasm_v128_store64_lane((a), (vec), (lane))
#endif
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_v128_store64_lane(a, vec, lane) simde_wasm_v128_store64_lane((a), (vec), (lane))
#endif
/* convert */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_convert_i32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_convert_i32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128 = _mm_cvtepi32_ps(a_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A32V7)
r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wc11-extensions")
#pragma clang diagnostic ignored "-Wc11-extensions"
#endif
r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0);
HEDLEY_DIAGNOSTIC_POP
#elif defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_convert_i32x4(a) simde_wasm_f32x4_convert_i32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_convert_u32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_convert_u32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.f32, a_.u32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_convert_u32x4(a) simde_wasm_f32x4_convert_u32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_convert_low_i32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_convert_low_i32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector)
r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.i32, a_.i32, 0, 1), __typeof__(r_.f64));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_convert_low_i32x4(a) simde_wasm_f64x2_convert_low_i32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_convert_low_u32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_convert_low_u32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector)
r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.u32, a_.u32, 0, 1), __typeof__(r_.f64));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.u32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_convert_low_u32x4(a) simde_wasm_f64x2_convert_low_u32x4((a))
#endif
/* trunc_sat */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_trunc_sat_f32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_trunc_sat_f32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32);
#elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)
SIMDE_CONVERT_VECTOR_(r_.f32, a_.f32);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i i32_max_mask = _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(2147483520.0))));
const __m128 clamped = _mm_max_ps(a_.sse_m128, _mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)));
r_.sse_m128i = _mm_cvttps_epi32(clamped);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128i =
_mm_castps_si128(
_mm_blendv_ps(
_mm_castsi128_ps(r_.sse_m128i),
_mm_castsi128_ps(_mm_set1_epi32(INT32_MAX)),
_mm_castsi128_ps(i32_max_mask)
)
);
#else
r_.sse_m128i =
_mm_or_si128(
_mm_and_si128(i32_max_mask, _mm_set1_epi32(INT32_MAX)),
_mm_andnot_si128(i32_max_mask, r_.sse_m128i)
);
#endif
r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128)));
#elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE)
SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32);
const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0) };
__typeof__(r_.i32) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.f32 > max_representable);
__typeof__(r_.i32) max_i32 = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX };
r_.i32 = (max_i32 & max_mask) | (r_.i32 & ~max_mask);
const __typeof__(a_.f32) min_representable = { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) };
__typeof__(r_.i32) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.f32 < min_representable);
__typeof__(r_.i32) min_i32 = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN };
r_.i32 = (min_i32 & min_mask) | (r_.i32 & ~min_mask);
r_.i32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == a_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
if (simde_math_isnanf(a_.f32[i])) {
r_.i32[i] = INT32_C(0);
} else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) {
r_.i32[i] = INT32_MIN;
} else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)) {
r_.i32[i] = INT32_MAX;
} else {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f32[i]);
}
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_trunc_sat_f32x4(a) simde_wasm_i32x4_trunc_sat_f32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_trunc_sat_f32x4 (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_trunc_sat_f32x4(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vcvtq_u32_f32(a_.neon_f32);
#elif defined(SIMDE_X86_SSE2_NATIVE)
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.sse_m128i = _mm_cvttps_epu32(a_.sse_m128);
#else
__m128 first_oob_high = _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0));
__m128 neg_zero_if_too_high =
_mm_castsi128_ps(
_mm_slli_epi32(
_mm_castps_si128(_mm_cmple_ps(first_oob_high, a_.sse_m128)),
31
)
);
r_.sse_m128i =
_mm_xor_si128(
_mm_cvttps_epi32(
_mm_sub_ps(a_.sse_m128, _mm_and_ps(neg_zero_if_too_high, first_oob_high))
),
_mm_castps_si128(neg_zero_if_too_high)
);
#endif
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(0.0)))));
r_.sse_m128i = _mm_or_si128 (r_.sse_m128i, _mm_castps_si128(_mm_cmpge_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0)))));
#endif
#if !defined(SIMDE_FAST_NANS)
r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128)));
#endif
#elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE)
SIMDE_CONVERT_VECTOR_(r_.u32, a_.f32);
const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0) };
r_.u32 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > max_representable);
const __typeof__(a_.f32) min_representable = { SIMDE_FLOAT32_C(0.0), };
r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > min_representable);
r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 == a_.f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
if (simde_math_isnan(a_.f32[i]) ||
a_.f32[i] < SIMDE_FLOAT32_C(0.0)) {
r_.u32[i] = UINT32_C(0);
} else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)) {
r_.u32[i] = UINT32_MAX;
} else {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f32[i]);
}
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_trunc_sat_f32x4(a) simde_wasm_u32x4_trunc_sat_f32x4((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_trunc_sat_f64x2_zero (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_trunc_sat_f64x2_zero(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i32 = vcombine_s32(vqmovn_s64(vcvtq_s64_f64(a_.neon_f64)), vdup_n_s32(INT32_C(0)));
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(double) in_not_nan =
vec_and(a_.altivec_f64, vec_cmpeq(a_.altivec_f64, a_.altivec_f64));
r_.altivec_i32 = vec_signede(in_not_nan);
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i32 =
vec_pack(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_i32),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0))
);
#else
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = {
0, 1, 2, 3, 4, 5, 6, 7,
16, 17, 18, 19, 20, 21, 22, 23
};
r_.altivec_i32 =
HEDLEY_REINTERPRET_CAST(
SIMDE_POWER_ALTIVEC_VECTOR(signed int),
vec_perm(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), r_.altivec_i32),
vec_splat_s8(0),
perm
)
);
#endif
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
if (simde_math_isnan(a_.f64[i])) {
r_.i32[i] = INT32_C(0);
} else if (a_.f64[i] < HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) {
r_.i32[i] = INT32_MIN;
} else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)) {
r_.i32[i] = INT32_MAX;
} else {
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f64[i]);
}
}
r_.i32[2] = 0;
r_.i32[3] = 0;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_trunc_sat_f64x2_zero(a) simde_wasm_i32x4_trunc_sat_f64x2_zero((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_u32x4_trunc_sat_f64x2_zero (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_u32x4_trunc_sat_f64x2_zero(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_u32 = vcombine_u32(vqmovn_u64(vcvtq_u64_f64(a_.neon_f64)), vdup_n_u32(UINT32_C(0)));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
if (simde_math_isnanf(a_.f64[i]) ||
a_.f64[i] < SIMDE_FLOAT64_C(0.0)) {
r_.u32[i] = UINT32_C(0);
} else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, UINT32_MAX)) {
r_.u32[i] = UINT32_MAX;
} else {
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f64[i]);
}
}
r_.u32[2] = 0;
r_.u32[3] = 0;
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_u32x4_trunc_sat_f64x2_zero(a) simde_wasm_u32x4_trunc_sat_f64x2_zero((a))
#endif
/* popcnt */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i8x16_popcnt (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i8x16_popcnt(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vcntq_s8(a_.neon_i8);
#elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE)
r_.sse_m128i = _mm_popcnt_epi8(a_.sse_m128i);
#elif defined(SIMDE_X86_AVX2_NATIVE)
__m128i tmp0 = _mm_set1_epi8(0x0f);
__m128i tmp1 = _mm_andnot_si128(tmp0, a_.sse_m128i);
__m128i y = _mm_and_si128(tmp0, a_.sse_m128i);
tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp1 = _mm_srli_epi16(tmp1, 4);
y = _mm_shuffle_epi8(tmp0, y);
tmp1 = _mm_shuffle_epi8(tmp0, tmp1);
return _mm_add_epi8(y, tmp1);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
__m128i tmp0 = _mm_set1_epi8(0x0f);
__m128i tmp1 = _mm_and_si128(a_.sse_m128i, tmp0);
tmp0 = _mm_andnot_si128(tmp0, a_.sse_m128i);
__m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp0 = _mm_srli_epi16(tmp0, 4);
y = _mm_shuffle_epi8(y, tmp1);
tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp1 = _mm_shuffle_epi8(tmp1, tmp0);
return _mm_add_epi8(y, tmp1);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp0 = _mm_and_si128(_mm_srli_epi16(a_.sse_m128i, 1), _mm_set1_epi8(0x55));
__m128i tmp1 = _mm_sub_epi8(a_.sse_m128i, tmp0);
tmp0 = tmp1;
tmp1 = _mm_and_si128(tmp1, _mm_set1_epi8(0x33));
tmp0 = _mm_and_si128(_mm_srli_epi16(tmp0, 2), _mm_set1_epi8(0x33));
tmp1 = _mm_add_epi8(tmp1, tmp0);
tmp0 = _mm_srli_epi16(tmp1, 4);
tmp1 = _mm_add_epi8(tmp1, tmp0);
r_.sse_m128i = _mm_and_si128(tmp1, _mm_set1_epi8(0x0f));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a_.altivec_i8)));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]);
v = v - ((v >> 1) & (85));
v = (v & (51)) + ((v >> (2)) & (51));
v = (v + (v >> (4))) & (15);
r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT;
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i8x16_popcnt(a) simde_wasm_i8x16_popcnt((a))
#endif
/* dot */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_i32x4_dot_i16x8 (simde_v128_t a, simde_v128_t b) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_i32x4_dot_i16x8(a, b);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
b_ = simde_v128_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, b_.sse_m128i);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16);
r_.neon_i32 = vpaddq_s32(pl, ph);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16));
int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl));
int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph));
r_.neon_i32 = vcombine_s32(rl, rh);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
int32_t SIMDE_VECTOR(32) a32, b32, p32;
SIMDE_CONVERT_VECTOR_(a32, a_.i16);
SIMDE_CONVERT_VECTOR_(b32, b_.i16);
p32 = a32 * b32;
r_.i32 =
__builtin_shufflevector(p32, p32, 0, 2, 4, 6) +
__builtin_shufflevector(p32, p32, 1, 3, 5, 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) {
r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_i32x4_dot_i16x8(a, b) simde_wasm_i32x4_dot_i16x8((a), (b))
#endif
/* ceil */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_ceil (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_ceil(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128 = _mm_round_ps(a_.sse_m128, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/WebAssembly/simd/pull/232 */
const __m128i input_as_i32 = _mm_cvttps_epi32(a_.sse_m128);
const __m128i i32_min = _mm_set1_epi32(INT32_MIN);
const __m128i input_is_out_of_range = _mm_or_si128(_mm_cmpeq_epi32(input_as_i32, i32_min), i32_min);
const __m128 truncated =
_mm_or_ps(
_mm_andnot_ps(
_mm_castsi128_ps(input_is_out_of_range),
_mm_cvtepi32_ps(input_as_i32)
),
_mm_castsi128_ps(
_mm_castps_si128(
_mm_and_ps(
_mm_castsi128_ps(input_is_out_of_range),
a_.sse_m128
)
)
)
);
const __m128 trunc_is_ge_input =
_mm_or_ps(
_mm_cmple_ps(a_.sse_m128, truncated),
_mm_castsi128_ps(i32_min)
);
r_.sse_m128 =
_mm_or_ps(
_mm_andnot_ps(
trunc_is_ge_input,
_mm_add_ps(truncated, _mm_set1_ps(SIMDE_FLOAT32_C(1.0)))
),
_mm_and_ps(trunc_is_ge_input, truncated)
);
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
r_.neon_f32 = vrndpq_f32(a_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
r_.altivec_f32 = vec_ceil(a_.altivec_f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = simde_math_ceilf(a_.f32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_ceil(a) simde_wasm_f32x4_ceil((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_ceil (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_ceil(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128d = _mm_round_pd(a_.sse_m128d, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/WebAssembly/simd/pull/232 */
const __m128d all_but_sign_set = _mm_castsi128_pd(_mm_set1_epi64x(INT64_C(0x7FFFFFFFFFFFFFFF)));
/* https://stackoverflow.com/a/55077612 explains this a bit */
const __m128d bignum = _mm_set1_pd(4.50359962737049600000e+15);
const __m128d sign_cleared = _mm_and_pd(a_.sse_m128d, all_but_sign_set);
__m128d mask =
_mm_and_pd(
_mm_cmpnle_pd(bignum, sign_cleared),
all_but_sign_set
);
const __m128d tmp =
_mm_or_pd(
_mm_andnot_pd(mask, a_.sse_m128d),
_mm_and_pd (mask, _mm_sub_pd(_mm_add_pd(sign_cleared, bignum), bignum))
);
r_.sse_m128d =
_mm_add_pd(
tmp,
_mm_and_pd(_mm_and_pd(_mm_cmplt_pd(tmp, a_.sse_m128d), all_but_sign_set), _mm_set1_pd(1.0))
);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vrndpq_f64(a_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = vec_ceil(a_.altivec_f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = simde_math_ceil(a_.f64[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_ceil(a) simde_wasm_f64x2_ceil((a))
#endif
/* floor */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_floor (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_floor(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.sse_m128 = _mm_floor_ps(a_.sse_m128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i vint_min = _mm_set1_epi32(INT_MIN);
const __m128i input_as_int = _mm_cvttps_epi32(a_.sse_m128);
const __m128 input_truncated = _mm_cvtepi32_ps(input_as_int);
const __m128i oor_all_or_neg = _mm_or_si128(_mm_cmpeq_epi32(input_as_int, vint_min), vint_min);
const __m128 tmp =
_mm_castsi128_ps(
_mm_or_si128(
_mm_andnot_si128(
oor_all_or_neg,
_mm_castps_si128(input_truncated)
),
_mm_and_si128(
oor_all_or_neg,
_mm_castps_si128(a_.sse_m128)
)
)
);
r_.sse_m128 =
_mm_sub_ps(
tmp,
_mm_and_ps(
_mm_cmplt_ps(
a_.sse_m128,
tmp
),
_mm_set1_ps(SIMDE_FLOAT32_C(1.0))
)
);
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
r_.neon_f32 = vrndmq_f32(a_.f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
const int32x4_t input_as_int = vcvtq_s32_f32(a_.f32);
const float32x4_t input_truncated = vcvtq_f32_s32(input_as_int);
const float32x4_t tmp =
vbslq_f32(
vbicq_u32(
vcagtq_f32(
vreinterpretq_f32_u32(vdupq_n_u32(UINT32_C(0x4B000000))),
a_.f32
),
vdupq_n_u32(UINT32_C(0x80000000))
),
input_truncated,
a_.f32);
r_.neon_f32 =
vsubq_f32(
tmp,
vreinterpretq_f32_u32(
vandq_u32(
vcgtq_f32(
tmp,
a_.f32
),
vdupq_n_u32(UINT32_C(0x3F800000))
)
)
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
r_.altivec_f32 = vec_floor(a_.altivec_f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = simde_math_floorf(a_.f32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_floor(a) simde_wasm_f32x4_floor((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_floor (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_floor(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = simde_math_floor(a_.f64[i]);
}
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_floor(a) simde_wasm_f64x2_floor((a))
#endif
/* trunc */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_trunc (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_trunc(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = simde_math_truncf(a_.f32[i]);
}
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_trunc(a) simde_wasm_f32x4_trunc((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_trunc (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_trunc(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = simde_math_trunc(a_.f64[i]);
}
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_trunc(a) simde_wasm_f64x2_trunc((a))
#endif
/* nearest */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_nearest (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_nearest(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = simde_math_roundf(a_.f32[i]);
}
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_nearest(a) simde_wasm_f32x4_nearest((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_nearest (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_nearest(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = simde_math_round(a_.f64[i]);
}
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_nearest(a) simde_wasm_f64x2_nearest((a))
#endif
/* sqrt */
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f32x4_sqrt (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f32x4_sqrt(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE_NATIVE)
r_.sse_m128 = _mm_sqrt_ps(a_.sse_m128);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f32 = vsqrtq_f32(a_.neon_f32);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 = vec_sqrt(a_.altivec_f32);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.f32[i] = simde_math_sqrtf(a_.f32[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f32x4_sqrt(a) simde_wasm_f32x4_sqrt((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_v128_t
simde_wasm_f64x2_sqrt (simde_v128_t a) {
#if defined(SIMDE_WASM_SIMD128_NATIVE)
return wasm_f64x2_sqrt(a);
#else
simde_v128_private
a_ = simde_v128_to_private(a),
r_;
#if defined(SIMDE_X86_SSE_NATIVE)
r_.sse_m128d = _mm_sqrt_pd(a_.sse_m128d);
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_f64 = vsqrtq_f64(a_.neon_f64);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f64 = vec_sqrt(a_.altivec_f64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = simde_math_sqrt(a_.f64[i]);
}
#endif
return simde_v128_from_private(r_);
#endif
}
#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES)
#define wasm_f64x2_sqrt(a) simde_wasm_f64x2_sqrt((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_WASM_SIMD128_H) */