702 lines
21 KiB
C
702 lines
21 KiB
C
#if !defined(SIMDE_X86_AVX512_COMPRESS_H)
|
|
#define SIMDE_X86_AVX512_COMPRESS_H
|
|
|
|
#include "types.h"
|
|
|
|
HEDLEY_DIAGNOSTIC_PUSH
|
|
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
SIMDE_BEGIN_DECLS_
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256d
|
|
simde_mm256_mask_compress_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_mask_compress_pd(src, k, a);
|
|
#else
|
|
simde__m256d_private
|
|
a_ = simde__m256d_to_private(a),
|
|
src_ = simde__m256d_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) {
|
|
a_.f64[ri] = src_.f64[ri];
|
|
}
|
|
|
|
return simde__m256d_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compress_pd
|
|
#define _mm256_mask_compress_pd(src, k, a) _mm256_mask_compress_pd(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm256_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m256d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm256_mask_compressstoreu_pd(base_addr, k, a);
|
|
#else
|
|
simde__m256d_private
|
|
a_ = simde__m256d_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compressstoreu_pd
|
|
#define _mm256_mask_compressstoreu_pd(base_addr, k, a) _mm256_mask_compressstoreu_pd(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256d
|
|
simde_mm256_maskz_compress_pd (simde__mmask8 k, simde__m256d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_maskz_compress_pd(k, a);
|
|
#else
|
|
simde__m256d_private
|
|
a_ = simde__m256d_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) {
|
|
a_.f64[ri] = SIMDE_FLOAT64_C(0.0);
|
|
}
|
|
|
|
return simde__m256d_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_maskz_compress_pd
|
|
#define _mm256_maskz_compress_pd(k, a) _mm256_maskz_compress_pd(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256
|
|
simde_mm256_mask_compress_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_mask_compress_ps(src, k, a);
|
|
#else
|
|
simde__m256_private
|
|
a_ = simde__m256_to_private(a),
|
|
src_ = simde__m256_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) {
|
|
a_.f32[ri] = src_.f32[ri];
|
|
}
|
|
|
|
return simde__m256_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compress_ps
|
|
#define _mm256_mask_compress_ps(src, k, a) _mm256_mask_compress_ps(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm256_mask_compressstoreu_ps (void* base_addr, simde__mmask8 k, simde__m256 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm256_mask_compressstoreu_ps(base_addr, k, a);
|
|
#else
|
|
simde__m256_private
|
|
a_ = simde__m256_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compressstoreu_pd
|
|
#define _mm256_mask_compressstoreu_ps(base_addr, k, a) _mm256_mask_compressstoreu_ps(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256
|
|
simde_mm256_maskz_compress_ps (simde__mmask8 k, simde__m256 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_maskz_compress_ps(k, a);
|
|
#else
|
|
simde__m256_private
|
|
a_ = simde__m256_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) {
|
|
a_.f32[ri] = SIMDE_FLOAT32_C(0.0);
|
|
}
|
|
|
|
return simde__m256_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_maskz_compress_ps
|
|
#define _mm256_maskz_compress_ps(k, a) _mm256_maskz_compress_ps(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256i
|
|
simde_mm256_mask_compress_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_mask_compress_epi32(src, k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a),
|
|
src_ = simde__m256i_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) {
|
|
a_.i32[ri] = src_.i32[ri];
|
|
}
|
|
|
|
return simde__m256i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compress_epi32
|
|
#define _mm256_mask_compress_epi32(src, k, a) _mm256_mask_compress_epi32(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm256_mask_compressstoreu_epi32 (void* base_addr, simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm256_mask_compressstoreu_epi32(base_addr, k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compressstoreu_epi32
|
|
#define _mm256_mask_compressstoreu_epi32(base_addr, k, a) _mm256_mask_compressstoreu_epi32(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256i
|
|
simde_mm256_maskz_compress_epi32 (simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_maskz_compress_epi32(k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) {
|
|
a_.f32[ri] = INT32_C(0);
|
|
}
|
|
|
|
return simde__m256i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_maskz_compress_epi32
|
|
#define _mm256_maskz_compress_epi32(k, a) _mm256_maskz_compress_epi32(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256i
|
|
simde_mm256_mask_compress_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_mask_compress_epi64(src, k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a),
|
|
src_ = simde__m256i_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) {
|
|
a_.i64[ri] = src_.i64[ri];
|
|
}
|
|
|
|
return simde__m256i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compress_epi64
|
|
#define _mm256_mask_compress_epi64(src, k, a) _mm256_mask_compress_epi64(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm256_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm256_mask_compressstoreu_epi64(base_addr, k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_mask_compressstoreu_epi64
|
|
#define _mm256_mask_compressstoreu_epi64(base_addr, k, a) _mm256_mask_compressstoreu_epi64(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m256i
|
|
simde_mm256_maskz_compress_epi64 (simde__mmask8 k, simde__m256i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm256_maskz_compress_epi64(k, a);
|
|
#else
|
|
simde__m256i_private
|
|
a_ = simde__m256i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) {
|
|
a_.i64[ri] = INT64_C(0);
|
|
}
|
|
|
|
return simde__m256i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm256_maskz_compress_epi64
|
|
#define _mm256_maskz_compress_epi64(k, a) _mm256_maskz_compress_epi64(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512d
|
|
simde_mm512_mask_compress_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_mask_compress_pd(src, k, a);
|
|
#else
|
|
simde__m512d_private
|
|
a_ = simde__m512d_to_private(a),
|
|
src_ = simde__m512d_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) {
|
|
a_.f64[ri] = src_.f64[ri];
|
|
}
|
|
|
|
return simde__m512d_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compress_pd
|
|
#define _mm512_mask_compress_pd(src, k, a) _mm512_mask_compress_pd(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm512_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m512d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm512_mask_compressstoreu_pd(base_addr, k, a);
|
|
#else
|
|
simde__m512d_private
|
|
a_ = simde__m512d_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compressstoreu_pd
|
|
#define _mm512_mask_compressstoreu_pd(base_addr, k, a) _mm512_mask_compressstoreu_pd(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512d
|
|
simde_mm512_maskz_compress_pd (simde__mmask8 k, simde__m512d a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_maskz_compress_pd(k, a);
|
|
#else
|
|
simde__m512d_private
|
|
a_ = simde__m512d_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f64[ri++] = a_.f64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) {
|
|
a_.f64[ri] = SIMDE_FLOAT64_C(0.0);
|
|
}
|
|
|
|
return simde__m512d_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_maskz_compress_pd
|
|
#define _mm512_maskz_compress_pd(k, a) _mm512_maskz_compress_pd(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512
|
|
simde_mm512_mask_compress_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_mask_compress_ps(src, k, a);
|
|
#else
|
|
simde__m512_private
|
|
a_ = simde__m512_to_private(a),
|
|
src_ = simde__m512_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) {
|
|
a_.f32[ri] = src_.f32[ri];
|
|
}
|
|
|
|
return simde__m512_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compress_ps
|
|
#define _mm512_mask_compress_ps(src, k, a) _mm512_mask_compress_ps(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm512_mask_compressstoreu_ps (void* base_addr, simde__mmask16 k, simde__m512 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm512_mask_compressstoreu_ps(base_addr, k, a);
|
|
#else
|
|
simde__m512_private
|
|
a_ = simde__m512_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compressstoreu_pd
|
|
#define _mm512_mask_compressstoreu_ps(base_addr, k, a) _mm512_mask_compressstoreu_ps(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512
|
|
simde_mm512_maskz_compress_ps (simde__mmask16 k, simde__m512 a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_maskz_compress_ps(k, a);
|
|
#else
|
|
simde__m512_private
|
|
a_ = simde__m512_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.f32[ri++] = a_.f32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) {
|
|
a_.f32[ri] = SIMDE_FLOAT32_C(0.0);
|
|
}
|
|
|
|
return simde__m512_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_maskz_compress_ps
|
|
#define _mm512_maskz_compress_ps(k, a) _mm512_maskz_compress_ps(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512i
|
|
simde_mm512_mask_compress_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_mask_compress_epi32(src, k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a),
|
|
src_ = simde__m512i_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) {
|
|
a_.i32[ri] = src_.i32[ri];
|
|
}
|
|
|
|
return simde__m512i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compress_epi32
|
|
#define _mm512_mask_compress_epi32(src, k, a) _mm512_mask_compress_epi32(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm512_mask_compressstoreu_epi32 (void* base_addr, simde__mmask16 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm512_mask_compressstoreu_epi32(base_addr, k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compressstoreu_epi32
|
|
#define _mm512_mask_compressstoreu_epi32(base_addr, k, a) _mm512_mask_compressstoreu_epi32(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512i
|
|
simde_mm512_maskz_compress_epi32 (simde__mmask16 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_maskz_compress_epi32(k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i32[ri++] = a_.i32[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) {
|
|
a_.f32[ri] = INT32_C(0);
|
|
}
|
|
|
|
return simde__m512i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_maskz_compress_epi32
|
|
#define _mm512_maskz_compress_epi32(k, a) _mm512_maskz_compress_epi32(k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512i
|
|
simde_mm512_mask_compress_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_mask_compress_epi64(src, k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a),
|
|
src_ = simde__m512i_to_private(src);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) {
|
|
a_.i64[ri] = src_.i64[ri];
|
|
}
|
|
|
|
return simde__m512i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compress_epi64
|
|
#define _mm512_mask_compress_epi64(src, k, a) _mm512_mask_compress_epi64(src, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
void
|
|
simde_mm512_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
_mm512_mask_compressstoreu_epi64(base_addr, k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0]));
|
|
|
|
return;
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_mask_compressstoreu_epi64
|
|
#define _mm512_mask_compressstoreu_epi64(base_addr, k, a) _mm512_mask_compressstoreu_epi64(base_addr, k, a)
|
|
#endif
|
|
|
|
SIMDE_FUNCTION_ATTRIBUTES
|
|
simde__m512i
|
|
simde_mm512_maskz_compress_epi64 (simde__mmask8 k, simde__m512i a) {
|
|
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
|
|
return _mm512_maskz_compress_epi64(k, a);
|
|
#else
|
|
simde__m512i_private
|
|
a_ = simde__m512i_to_private(a);
|
|
size_t ri = 0;
|
|
|
|
SIMDE_VECTORIZE
|
|
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
if ((k >> i) & 1) {
|
|
a_.i64[ri++] = a_.i64[i];
|
|
}
|
|
}
|
|
|
|
for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) {
|
|
a_.i64[ri] = INT64_C(0);
|
|
}
|
|
|
|
return simde__m512i_from_private(a_);
|
|
#endif
|
|
}
|
|
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
#undef _mm512_maskz_compress_epi64
|
|
#define _mm512_maskz_compress_epi64(k, a) _mm512_maskz_compress_epi64(k, a)
|
|
#endif
|
|
|
|
SIMDE_END_DECLS_
|
|
HEDLEY_DIAGNOSTIC_POP
|
|
|
|
#endif /* !defined(SIMDE_X86_AVX512_COMPRESS_H) */
|