389 lines
16 KiB
C
389 lines
16 KiB
C
#if !defined(SIMDE_X86_AVX512_DBSAD_H)
|
|
#define SIMDE_X86_AVX512_DBSAD_H
|
|
|
|
#include "types.h"
|
|
#include "mov.h"
|
|
#include "../avx2.h"
|
|
#include "shuffle.h"
|
|
|
|
HEDLEY_DIAGNOSTIC_PUSH
|
|
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
SIMDE_BEGIN_DECLS_
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm_dbsad_epu8(a, b, imm8) _mm_dbsad_epu8((a), (b), (imm8))
|
|
#else
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m128i
|
|
simde_mm_dbsad_epu8_internal_ (simde__m128i a, simde__m128i b) {
|
|
simde__m128i_private
|
|
r_,
|
|
a_ = simde__m128i_to_private(a),
|
|
b_ = simde__m128i_to_private(b);
|
|
|
|
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
|
uint8_t a1 SIMDE_VECTOR(16) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, a_.u8, a_.u8,
|
|
0, 1, 0, 1,
|
|
4, 5, 4, 5,
|
|
8, 9, 8, 9,
|
|
12, 13, 12, 13);
|
|
uint8_t b1 SIMDE_VECTOR(16) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, b_.u8, b_.u8,
|
|
0, 1, 1, 2,
|
|
2, 3, 3, 4,
|
|
8, 9, 9, 10,
|
|
10, 11, 11, 12);
|
|
|
|
__typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1);
|
|
__typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask));
|
|
|
|
r_.u16 =
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16));
|
|
|
|
uint8_t a2 SIMDE_VECTOR(16) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, a_.u8, a_.u8,
|
|
2, 3, 2, 3,
|
|
6, 7, 6, 7,
|
|
10, 11, 10, 11,
|
|
14, 15, 14, 15);
|
|
uint8_t b2 SIMDE_VECTOR(16) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, b_.u8, b_.u8,
|
|
2, 3, 3, 4,
|
|
4, 5, 5, 6,
|
|
10, 11, 11, 12,
|
|
12, 13, 13, 14);
|
|
|
|
__typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2);
|
|
__typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask));
|
|
|
|
r_.u16 +=
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16));
|
|
#else
|
|
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
r_.u16[i] = 0;
|
|
for (size_t j = 0 ; j < 4 ; j++) {
|
|
uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[((i << 1) & 12) + j]);
|
|
uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8)) + j]);
|
|
r_.u16[i] += (A < B) ? (B - A) : (A - B);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return simde__m128i_from_private(r_);
|
|
}
|
|
#define simde_mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8_internal_((a), simde_mm_shuffle_epi32((b), (imm8)))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm_dbsad_epu8
|
|
#define _mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8(a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) _mm_mask_dbsad_epu8((src), (k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_mov_epi16(src, k, simde_mm_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm_mask_dbsad_epu8
|
|
#define _mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_dbsad_epu8(src, k, a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) _mm_maskz_dbsad_epu8((k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_mov_epi16(k, simde_mm_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm_maskz_dbsad_epu8
|
|
#define _mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_dbsad_epu8(k, a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm256_dbsad_epu8(a, b, imm8) _mm256_dbsad_epu8((a), (b), (imm8))
|
|
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_)
|
|
#define simde_mm256_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \
|
|
simde__m256i_private \
|
|
simde_mm256_dbsad_epu8_a_ = simde__m256i_to_private(a), \
|
|
simde_mm256_dbsad_epu8_b_ = simde__m256i_to_private(b); \
|
|
\
|
|
simde_mm256_dbsad_epu8_a_.m128i[0] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[0], simde_mm256_dbsad_epu8_b_.m128i[0], imm8); \
|
|
simde_mm256_dbsad_epu8_a_.m128i[1] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[1], simde_mm256_dbsad_epu8_b_.m128i[1], imm8); \
|
|
\
|
|
simde__m256i_from_private(simde_mm256_dbsad_epu8_a_); \
|
|
}))
|
|
#else
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256i
|
|
simde_mm256_dbsad_epu8_internal_ (simde__m256i a, simde__m256i b) {
|
|
simde__m256i_private
|
|
r_,
|
|
a_ = simde__m256i_to_private(a),
|
|
b_ = simde__m256i_to_private(b);
|
|
|
|
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
|
uint8_t a1 SIMDE_VECTOR(32) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 32, a_.u8, a_.u8,
|
|
0, 1, 0, 1,
|
|
4, 5, 4, 5,
|
|
8, 9, 8, 9,
|
|
12, 13, 12, 13,
|
|
16, 17, 16, 17,
|
|
20, 21, 20, 21,
|
|
24, 25, 24, 25,
|
|
28, 29, 28, 29);
|
|
uint8_t b1 SIMDE_VECTOR(32) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, b_.u8, b_.u8,
|
|
0, 1, 1, 2,
|
|
2, 3, 3, 4,
|
|
8, 9, 9, 10,
|
|
10, 11, 11, 12,
|
|
16, 17, 17, 18,
|
|
18, 19, 19, 20,
|
|
24, 25, 25, 26,
|
|
26, 27, 27, 28);
|
|
|
|
__typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1);
|
|
__typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask));
|
|
|
|
r_.u16 =
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16));
|
|
|
|
uint8_t a2 SIMDE_VECTOR(32) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 32, a_.u8, a_.u8,
|
|
2, 3, 2, 3,
|
|
6, 7, 6, 7,
|
|
10, 11, 10, 11,
|
|
14, 15, 14, 15,
|
|
18, 19, 18, 19,
|
|
22, 23, 22, 23,
|
|
26, 27, 26, 27,
|
|
30, 31, 30, 31);
|
|
uint8_t b2 SIMDE_VECTOR(32) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 16, b_.u8, b_.u8,
|
|
2, 3, 3, 4,
|
|
4, 5, 5, 6,
|
|
10, 11, 11, 12,
|
|
12, 13, 13, 14,
|
|
18, 19, 19, 20,
|
|
20, 21, 21, 22,
|
|
26, 27, 27, 28,
|
|
28, 29, 29, 30);
|
|
|
|
__typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2);
|
|
__typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask));
|
|
|
|
r_.u16 +=
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16));
|
|
#else
|
|
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
r_.u16[i] = 0;
|
|
for (size_t j = 0 ; j < 4 ; j++) {
|
|
uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1)) + j]);
|
|
uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1)) + j]);
|
|
r_.u16[i] += (A < B) ? (B - A) : (A - B);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return simde__m256i_from_private(r_);
|
|
}
|
|
#define simde_mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8_internal_((a), simde_mm256_shuffle_epi32(b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_dbsad_epu8
|
|
#define _mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8(a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) _mm256_mask_dbsad_epu8((src), (k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_mov_epi16(src, k, simde_mm256_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_dbsad_epu8
|
|
#define _mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
|
|
#define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) _mm256_maskz_dbsad_epu8((k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_mov_epi16(k, simde_mm256_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_maskz_dbsad_epu8
|
|
#define _mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_dbsad_epu8(k, a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE)
|
|
#define simde_mm512_dbsad_epu8(a, b, imm8) _mm512_dbsad_epu8((a), (b), (imm8))
|
|
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_)
|
|
#define simde_mm512_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \
|
|
simde__m512i_private \
|
|
simde_mm512_dbsad_epu8_a_ = simde__m512i_to_private(a), \
|
|
simde_mm512_dbsad_epu8_b_ = simde__m512i_to_private(b); \
|
|
\
|
|
simde_mm512_dbsad_epu8_a_.m256i[0] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[0], simde_mm512_dbsad_epu8_b_.m256i[0], imm8); \
|
|
simde_mm512_dbsad_epu8_a_.m256i[1] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[1], simde_mm512_dbsad_epu8_b_.m256i[1], imm8); \
|
|
\
|
|
simde__m512i_from_private(simde_mm512_dbsad_epu8_a_); \
|
|
}))
|
|
#else
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512i
|
|
simde_mm512_dbsad_epu8_internal_ (simde__m512i a, simde__m512i b) {
|
|
simde__m512i_private
|
|
r_,
|
|
a_ = simde__m512i_to_private(a),
|
|
b_ = simde__m512i_to_private(b);
|
|
|
|
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
|
uint8_t a1 SIMDE_VECTOR(64) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 64, a_.u8, a_.u8,
|
|
0, 1, 0, 1,
|
|
4, 5, 4, 5,
|
|
8, 9, 8, 9,
|
|
12, 13, 12, 13,
|
|
16, 17, 16, 17,
|
|
20, 21, 20, 21,
|
|
24, 25, 24, 25,
|
|
28, 29, 28, 29,
|
|
32, 33, 32, 33,
|
|
36, 37, 36, 37,
|
|
40, 41, 40, 41,
|
|
44, 45, 44, 45,
|
|
48, 49, 48, 49,
|
|
52, 53, 52, 53,
|
|
56, 57, 56, 57,
|
|
60, 61, 60, 61);
|
|
uint8_t b1 SIMDE_VECTOR(64) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 64, b_.u8, b_.u8,
|
|
0, 1, 1, 2,
|
|
2, 3, 3, 4,
|
|
8, 9, 9, 10,
|
|
10, 11, 11, 12,
|
|
16, 17, 17, 18,
|
|
18, 19, 19, 20,
|
|
24, 25, 25, 26,
|
|
26, 27, 27, 28,
|
|
32, 33, 33, 34,
|
|
34, 35, 35, 36,
|
|
40, 41, 41, 42,
|
|
42, 43, 43, 44,
|
|
48, 49, 49, 50,
|
|
50, 51, 51, 52,
|
|
56, 57, 57, 58,
|
|
58, 59, 59, 60);
|
|
|
|
__typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1);
|
|
__typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask));
|
|
|
|
r_.u16 =
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16));
|
|
|
|
uint8_t a2 SIMDE_VECTOR(64) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 64, a_.u8, a_.u8,
|
|
2, 3, 2, 3,
|
|
6, 7, 6, 7,
|
|
10, 11, 10, 11,
|
|
14, 15, 14, 15,
|
|
18, 19, 18, 19,
|
|
22, 23, 22, 23,
|
|
26, 27, 26, 27,
|
|
30, 31, 30, 31,
|
|
34, 35, 34, 35,
|
|
38, 39, 38, 39,
|
|
42, 43, 42, 43,
|
|
46, 47, 46, 47,
|
|
50, 51, 50, 51,
|
|
54, 55, 54, 55,
|
|
58, 59, 58, 59,
|
|
62, 63, 62, 63);
|
|
uint8_t b2 SIMDE_VECTOR(64) =
|
|
SIMDE_SHUFFLE_VECTOR_(
|
|
8, 64, b_.u8, b_.u8,
|
|
2, 3, 3, 4,
|
|
4, 5, 5, 6,
|
|
10, 11, 11, 12,
|
|
12, 13, 13, 14,
|
|
18, 19, 19, 20,
|
|
20, 21, 21, 22,
|
|
26, 27, 27, 28,
|
|
28, 29, 29, 30,
|
|
34, 35, 35, 36,
|
|
36, 37, 37, 38,
|
|
42, 43, 43, 44,
|
|
44, 45, 45, 46,
|
|
50, 51, 51, 52,
|
|
52, 53, 53, 54,
|
|
58, 59, 59, 60,
|
|
60, 61, 61, 62);
|
|
|
|
__typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2);
|
|
__typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask));
|
|
|
|
r_.u16 +=
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) +
|
|
__builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16));
|
|
#else
|
|
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
r_.u16[i] = 0;
|
|
for (size_t j = 0 ; j < 4 ; j++) {
|
|
uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1) | ((i & 16) << 1)) + j]);
|
|
uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1) | ((i & 16) << 1)) + j]);
|
|
r_.u16[i] += (A < B) ? (B - A) : (A - B);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return simde__m512i_from_private(r_);
|
|
}
|
|
#define simde_mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8_internal_((a), simde_mm512_castps_si512(simde_mm512_shuffle_ps(simde_mm512_castsi512_ps(b), simde_mm512_castsi512_ps(b), imm8)))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_dbsad_epu8
|
|
#define _mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8(a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE)
|
|
#define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) _mm512_mask_dbsad_epu8((src), (k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_mov_epi16(src, k, simde_mm512_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_dbsad_epu8
|
|
#define _mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8)
|
|
#endif
|
|
|
|
#if defined(SIMDE_X86_AVX512BW_NATIVE)
|
|
#define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) _mm512_maskz_dbsad_epu8((k), (a), (b), (imm8))
|
|
#else
|
|
#define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_mov_epi16(k, simde_mm512_dbsad_epu8(a, b, imm8))
|
|
#endif
|
|
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_maskz_dbsad_epu8
|
|
#define _mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_dbsad_epu8(k, a, b, imm8)
|
|
#endif
|
|
|
|
|
|
SIMDE_END_DECLS_
|
|
HEDLEY_DIAGNOSTIC_POP
|
|
|
|
#endif /* !defined(SIMDE_X86_AVX512_DBSAD_H) */
|