Add simde

This commit is contained in:
Benau
2022-04-29 11:02:25 +08:00
parent 0f2b3da37e
commit 383bd93261
350 changed files with 187154 additions and 0 deletions

View File

@@ -0,0 +1,159 @@
/* ==========================================================================
* Copyright (c) 2022 SuperTuxKart-Team
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to permit
* persons to whom the Software is furnished to do so, subject to the
* following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
* NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
* ==========================================================================
*/
#ifndef HEADER_SIMD_WRAPPER_HPP
#define HEADER_SIMD_WRAPPER_HPP
#include <simde/simde-arch.h>
#if defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_X86)
// Native SSE
#if __MMX__ || CPU_ENABLE_MMX
#include <mmintrin.h>
#define CPU_MMX_SUPPORT (1)
#endif
#if __SSE__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 1 ) ) || CPU_ENABLE_SSE
#include <xmmintrin.h>
#define CPU_SSE_SUPPORT (1)
#endif
#if __SSE2__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 2 ) ) || CPU_ENABLE_SSE2
#include <emmintrin.h>
#define CPU_SSE2_SUPPORT (1)
#endif
#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
#include <pmmintrin.h>
#define CPU_SSE3_SUPPORT (1)
#endif
#if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3
#include <tmmintrin.h>
#define CPU_SSSE3_SUPPORT (1)
#endif
#if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1
#include <smmintrin.h>
#define CPU_SSE4_1_SUPPORT (1)
#endif
#if __SSE4_2__ || CPU_ENABLE_SSE4_2
#include <nmmintrin.h>
#define CPU_SSE4_2_SUPPORT (1)
#endif
#elif defined(SIMDE_ARCH_ARM_NEON)
// We only enable compile time SSE* to Neon for now because it's easy to test
// Enable up to SSE4.2 because after that (starting from AVX) it has few
// native conversion, which will use the slower C99 fallback
#define CPU_MMX_SUPPORT (1)
#define CPU_SSE_SUPPORT (1)
#define CPU_SSE2_SUPPORT (1)
#define CPU_SSE3_SUPPORT (1)
#define CPU_SSSE3_SUPPORT (1)
#define CPU_SSE4_1_SUPPORT (1)
#define CPU_SSE4_2_SUPPORT (1)
#if defined(_MSC_VER) && defined(__cplusplus)
// Fix math related functions missing in msvc
#include <cmath>
#endif
#define SIMDE_ENABLE_NATIVE_ALIASES
#include "simde/x86/sse4.2.h"
#endif
#ifndef _MM_FROUND_TO_NEG_INF
#define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
#endif
#ifndef _MM_FROUND_NO_EXC
#define _MM_FROUND_NO_EXC SIMDE_MM_FROUND_NO_EXC
#endif
#ifndef _MM_SET_ROUNDING_MODE
#define _MM_SET_ROUNDING_MODE _MM_SET_ROUNDING_MODE
#endif
#ifndef _MM_ROUND_NEAREST
#define _MM_ROUND_NEAREST SIMDE_MM_ROUND_NEAREST
#endif
#ifndef _MM_ROUND_UP
#define _MM_ROUND_UP SIMDE_MM_ROUND_UP
#endif
#ifndef _MM_ROUND_DOWN
#define _MM_ROUND_DOWN SIMDE_MM_ROUND_DOWN
#endif
// Utilities for aligned allocation
inline void* simd_aligned_alloc(size_t alignment, size_t bytes)
{
// we need to allocate enough storage for the requested bytes, some
// book-keeping (to store the location returned by malloc) and some extra
// padding to allow us to find an aligned byte. I'm not entirely sure if
// 2 * alignment is enough here, its just a guess.
const size_t total_size = bytes + (2 * alignment) + sizeof(size_t);
// use malloc to allocate the memory.
char* data = (char*)malloc(sizeof(char) * total_size);
if (data)
{
// store the original start of the malloc'd data.
const void* const data_start = data;
// dedicate enough space to the book-keeping.
data += sizeof(size_t);
// find a memory location with correct alignment. the alignment minus
// the remainder of this mod operation is how many bytes forward we need
// to move to find an aligned byte.
const size_t offset = alignment - (((size_t)data) % alignment);
// set data to the aligned memory.
data += offset;
// write the book-keeping.
size_t* book_keeping = (size_t*)(data - sizeof(size_t));
*book_keeping = (size_t)data_start;
}
return data;
}
inline void simd_aligned_free(void* raw_data)
{
if (raw_data)
{
char* data = (char*)raw_data;
// we have to assume this memory was allocated with simd_aligned_alloc.
// this means the sizeof(size_t) bytes before data are the book-keeping
// which points to the location we need to pass to free.
data -= sizeof(size_t);
// set data to the location stored in book-keeping.
data = (char*)(*((size_t*)data));
// free the memory.
free(data);
}
}
#endif

View File

@@ -0,0 +1,20 @@
Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,10 @@
# SIMDe Without Test Cases
This repository contains only the core of
[SIMDe](https://github.com/simd-everywhere/simde).
It is generated automatically for every commit to master, and is
intended to be used as a submodule in projects which don't want to
include the (rather large) test cases.
All development work happens in the main repository, please do not
file issues or create pull requests against this repository.

View File

@@ -0,0 +1,210 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_H)
#define SIMDE_ARM_NEON_H
#include "neon/types.h"
#include "neon/aba.h"
#include "neon/abd.h"
#include "neon/abdl.h"
#include "neon/abs.h"
#include "neon/add.h"
#include "neon/addhn.h"
#include "neon/addl.h"
#include "neon/addlv.h"
#include "neon/addl_high.h"
#include "neon/addv.h"
#include "neon/addw.h"
#include "neon/addw_high.h"
#include "neon/and.h"
#include "neon/bcax.h"
#include "neon/bic.h"
#include "neon/bsl.h"
#include "neon/cage.h"
#include "neon/cagt.h"
#include "neon/ceq.h"
#include "neon/ceqz.h"
#include "neon/cge.h"
#include "neon/cgez.h"
#include "neon/cgt.h"
#include "neon/cgtz.h"
#include "neon/cle.h"
#include "neon/clez.h"
#include "neon/cls.h"
#include "neon/clt.h"
#include "neon/cltz.h"
#include "neon/clz.h"
#include "neon/cmla.h"
#include "neon/cmla_rot90.h"
#include "neon/cmla_rot180.h"
#include "neon/cmla_rot270.h"
#include "neon/cnt.h"
#include "neon/cvt.h"
#include "neon/combine.h"
#include "neon/create.h"
#include "neon/dot.h"
#include "neon/dot_lane.h"
#include "neon/dup_lane.h"
#include "neon/dup_n.h"
#include "neon/eor.h"
#include "neon/ext.h"
#include "neon/fma.h"
#include "neon/fma_lane.h"
#include "neon/fma_n.h"
#include "neon/get_high.h"
#include "neon/get_lane.h"
#include "neon/get_low.h"
#include "neon/hadd.h"
#include "neon/hsub.h"
#include "neon/ld1.h"
#include "neon/ld1_dup.h"
#include "neon/ld1_lane.h"
#include "neon/ld2.h"
#include "neon/ld3.h"
#include "neon/ld4.h"
#include "neon/ld4_lane.h"
#include "neon/max.h"
#include "neon/maxnm.h"
#include "neon/maxv.h"
#include "neon/min.h"
#include "neon/minnm.h"
#include "neon/minv.h"
#include "neon/mla.h"
#include "neon/mla_n.h"
#include "neon/mlal.h"
#include "neon/mlal_high.h"
#include "neon/mlal_high_n.h"
#include "neon/mlal_lane.h"
#include "neon/mlal_n.h"
#include "neon/mls.h"
#include "neon/mls_n.h"
#include "neon/mlsl.h"
#include "neon/mlsl_high.h"
#include "neon/mlsl_high_n.h"
#include "neon/mlsl_lane.h"
#include "neon/mlsl_n.h"
#include "neon/movl.h"
#include "neon/movl_high.h"
#include "neon/movn.h"
#include "neon/movn_high.h"
#include "neon/mul.h"
#include "neon/mul_lane.h"
#include "neon/mul_n.h"
#include "neon/mull.h"
#include "neon/mull_high.h"
#include "neon/mull_lane.h"
#include "neon/mull_n.h"
#include "neon/mvn.h"
#include "neon/neg.h"
#include "neon/orn.h"
#include "neon/orr.h"
#include "neon/padal.h"
#include "neon/padd.h"
#include "neon/paddl.h"
#include "neon/pmax.h"
#include "neon/pmin.h"
#include "neon/qabs.h"
#include "neon/qadd.h"
#include "neon/qdmulh.h"
#include "neon/qdmulh_lane.h"
#include "neon/qdmulh_n.h"
#include "neon/qdmull.h"
#include "neon/qrdmulh.h"
#include "neon/qrdmulh_lane.h"
#include "neon/qrdmulh_n.h"
#include "neon/qrshrn_n.h"
#include "neon/qrshrun_n.h"
#include "neon/qmovn.h"
#include "neon/qmovun.h"
#include "neon/qmovn_high.h"
#include "neon/qneg.h"
#include "neon/qsub.h"
#include "neon/qshl.h"
#include "neon/qshlu_n.h"
#include "neon/qshrn_n.h"
#include "neon/qshrun_n.h"
#include "neon/qtbl.h"
#include "neon/qtbx.h"
#include "neon/rbit.h"
#include "neon/recpe.h"
#include "neon/recps.h"
#include "neon/reinterpret.h"
#include "neon/rev16.h"
#include "neon/rev32.h"
#include "neon/rev64.h"
#include "neon/rhadd.h"
#include "neon/rnd.h"
#include "neon/rndm.h"
#include "neon/rndi.h"
#include "neon/rndn.h"
#include "neon/rndp.h"
#include "neon/rshl.h"
#include "neon/rshr_n.h"
#include "neon/rshrn_n.h"
#include "neon/rsqrte.h"
#include "neon/rsqrts.h"
#include "neon/rsra_n.h"
#include "neon/set_lane.h"
#include "neon/shl.h"
#include "neon/shl_n.h"
#include "neon/shll_n.h"
#include "neon/shr_n.h"
#include "neon/shrn_n.h"
#include "neon/sqadd.h"
#include "neon/sra_n.h"
#include "neon/sri_n.h"
#include "neon/st1.h"
#include "neon/st1_lane.h"
#include "neon/st2.h"
#include "neon/st2_lane.h"
#include "neon/st3.h"
#include "neon/st3_lane.h"
#include "neon/st4.h"
#include "neon/st4_lane.h"
#include "neon/sub.h"
#include "neon/subhn.h"
#include "neon/subl.h"
#include "neon/subl_high.h"
#include "neon/subw.h"
#include "neon/subw_high.h"
#include "neon/tbl.h"
#include "neon/tbx.h"
#include "neon/trn.h"
#include "neon/trn1.h"
#include "neon/trn2.h"
#include "neon/tst.h"
#include "neon/uqadd.h"
#include "neon/uzp.h"
#include "neon/uzp1.h"
#include "neon/uzp2.h"
#include "neon/xar.h"
#include "neon/zip.h"
#include "neon/zip1.h"
#include "neon/zip2.h"
#endif /* SIMDE_ARM_NEON_H */

View File

@@ -0,0 +1,208 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ABA_H)
#define SIMDE_ARM_NEON_ABA_H
#include "abd.h"
#include "add.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_s8(a, b, c);
#else
return simde_vadd_s8(simde_vabd_s8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_s8
#define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_s16(a, b, c);
#else
return simde_vadd_s16(simde_vabd_s16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_s16
#define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_s32(a, b, c);
#else
return simde_vadd_s32(simde_vabd_s32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_s32
#define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_u8(a, b, c);
#else
return simde_vadd_u8(simde_vabd_u8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_u8
#define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_u16(a, b, c);
#else
return simde_vadd_u16(simde_vabd_u16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_u16
#define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaba_u32(a, b, c);
#else
return simde_vadd_u32(simde_vabd_u32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaba_u32
#define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_s8(a, b, c);
#else
return simde_vaddq_s8(simde_vabdq_s8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_s8
#define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_s16(a, b, c);
#else
return simde_vaddq_s16(simde_vabdq_s16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_s16
#define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_s32(a, b, c);
#else
return simde_vaddq_s32(simde_vabdq_s32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_s32
#define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_u8(a, b, c);
#else
return simde_vaddq_u8(simde_vabdq_u8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_u8
#define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_u16(a, b, c);
#else
return simde_vaddq_u16(simde_vabdq_u16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_u16
#define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabaq_u32(a, b, c);
#else
return simde_vaddq_u32(simde_vabdq_u32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabaq_u32
#define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ABA_H) */

View File

@@ -0,0 +1,489 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ABD_H)
#define SIMDE_ARM_NEON_ABD_H
#include "abs.h"
#include "subl.h"
#include "movn.h"
#include "movl.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vabds_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabds_f32(a, b);
#else
simde_float32_t r = a - b;
return r < 0 ? -r : r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabds_f32
#define vabds_f32(a, b) simde_vabds_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vabdd_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdd_f64(a, b);
#else
simde_float64_t r = a - b;
return r < 0 ? -r : r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdd_f64
#define vabdd_f64(a, b) simde_vabdd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_f32(a, b);
#else
return simde_vabs_f32(simde_vsub_f32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_f32
#define vabd_f32(a, b) simde_vabd_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabd_f64(a, b);
#else
return simde_vabs_f64(simde_vsub_f64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabd_f64
#define vabd_f64(a, b) simde_vabd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_s8(a, b);
#elif defined(SIMDE_X86_MMX_NATIVE)
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
const __m64 m = _mm_cmpgt_pi8(b_.m64, a_.m64);
r_.m64 =
_mm_xor_si64(
_mm_add_pi8(
_mm_sub_pi8(a_.m64, b_.m64),
m
),
m
);
return simde_int8x8_from_private(r_);
#else
return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b)));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_s8
#define vabd_s8(a, b) simde_vabd_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_s16(a, b);
#elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
r_.m64 = _mm_sub_pi16(_mm_max_pi16(a_.m64, b_.m64), _mm_min_pi16(a_.m64, b_.m64));
return simde_int16x4_from_private(r_);
#else
return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b)));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_s16
#define vabd_s16(a, b) simde_vabd_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_s32(a, b);
#else
return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b)));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_s32
#define vabd_s32(a, b) simde_vabd_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_u8(a, b);
#else
return simde_vmovn_u16(
simde_vreinterpretq_u16_s16(
simde_vabsq_s16(
simde_vsubq_s16(
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))))));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_u8
#define vabd_u8(a, b) simde_vabd_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_u16(a, b);
#else
return simde_vmovn_u32(
simde_vreinterpretq_u32_s32(
simde_vabsq_s32(
simde_vsubq_s32(
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))))));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_u16
#define vabd_u16(a, b) simde_vabd_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabd_u32(a, b);
#else
return simde_vmovn_u64(
simde_vreinterpretq_u64_s64(
simde_vabsq_s64(
simde_vsubq_s64(
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))))));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabd_u32
#define vabd_u32(a, b) simde_vabd_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_f32(a, b);
#else
return simde_vabsq_f32(simde_vsubq_f32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_f32
#define vabdq_f32(a, b) simde_vabdq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdq_f64(a, b);
#else
return simde_vabsq_f64(simde_vsubq_f64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdq_f64
#define vabdq_f64(a, b) simde_vabdq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_sub_epi8(_mm_max_epi8(a_.m128i, b_.m128i), _mm_min_epi8(a_.m128i, b_.m128i));
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i m = _mm_cmpgt_epi8(b_.m128i, a_.m128i);
r_.m128i =
_mm_xor_si128(
_mm_add_epi8(
_mm_sub_epi8(a_.m128i, b_.m128i),
m
),
m
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_sub(wasm_i8x16_max(a_.v128, b_.v128), wasm_i8x16_min(a_.v128, b_.v128));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_s8
#define vabdq_s8(a, b) simde_vabdq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881658604 */
r_.m128i = _mm_sub_epi16(_mm_max_epi16(a_.m128i, b_.m128i), _mm_min_epi16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_sub(wasm_i16x8_max(a_.v128, b_.v128), wasm_i16x8_min(a_.v128, b_.v128));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] =
(a_.values[i] < b_.values[i]) ?
(b_.values[i] - a_.values[i]) :
(a_.values[i] - b_.values[i]);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_s16
#define vabdq_s16(a, b) simde_vabdq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_sub_epi32(_mm_max_epi32(a_.m128i, b_.m128i), _mm_min_epi32(a_.m128i, b_.m128i));
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i m = _mm_cmpgt_epi32(b_.m128i, a_.m128i);
r_.m128i =
_mm_xor_si128(
_mm_add_epi32(
_mm_sub_epi32(a_.m128i, b_.m128i),
m
),
m
);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_s32
#define vabdq_s32(a, b) simde_vabdq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
return vec_absd(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_sub_epi8(_mm_max_epu8(a_.m128i, b_.m128i), _mm_min_epu8(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_sub(wasm_u8x16_max(a_.v128, b_.v128), wasm_u8x16_min(a_.v128, b_.v128));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_u8
#define vabdq_u8(a, b) simde_vabdq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
return vec_absd(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_sub_epi16(_mm_max_epu16(a_.m128i, b_.m128i), _mm_min_epu16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_sub(wasm_u16x8_max(a_.v128, b_.v128), wasm_u16x8_min(a_.v128, b_.v128));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_u16
#define vabdq_u16(a, b) simde_vabdq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
return vec_absd(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_sub(vec_max(a, b), vec_min(a, b));
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b) - vec_min(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_sub_epi32(_mm_max_epu32(a_.m128i, b_.m128i), _mm_min_epu32(a_.m128i, b_.m128i));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdq_u32
#define vabdq_u32(a, b) simde_vabdq_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ABD_H) */

View File

@@ -0,0 +1,147 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ABDL_H)
#define SIMDE_ARM_NEON_ABDL_H
#include "abs.h"
#include "subl.h"
#include "movl.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_s8(a, b);
#else
return simde_vabsq_s16(simde_vsubl_s8(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_s8
#define vabdl_s8(a, b) simde_vabdl_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_s16(a, b);
#else
return simde_vabsq_s32(simde_vsubl_s16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_s16
#define vabdl_s16(a, b) simde_vabdl_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_s32(a, b);
#else
return simde_vabsq_s64(simde_vsubl_s32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_s32
#define vabdl_s32(a, b) simde_vabdl_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_u8(a, b);
#else
return simde_vreinterpretq_u16_s16(
simde_vabsq_s16(
simde_vsubq_s16(
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))
)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_u8
#define vabdl_u8(a, b) simde_vabdl_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_u16(a, b);
#else
return simde_vreinterpretq_u32_s32(
simde_vabsq_s32(
simde_vsubq_s32(
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))
)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_u16
#define vabdl_u16(a, b) simde_vabdl_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabdl_u32(a, b);
#else
return simde_vreinterpretq_u64_s64(
simde_vabsq_s64(
simde_vsubq_s64(
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))
)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabdl_u32
#define vabdl_u32(a, b) simde_vabdl_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */

View File

@@ -0,0 +1,431 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ABS_H)
#define SIMDE_ARM_NEON_ABS_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vabsd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0))
return vabsd_s64(a);
#else
return a < 0 ? -a : a;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabsd_s64
#define vabsd_s64(a) simde_vabsd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vabs_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabs_f32(a);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabs_f32
#define vabs_f32(a) simde_vabs_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vabs_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabs_f64(a);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabs_f64
#define vabs_f64(a) simde_vabs_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vabs_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabs_s8(a);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_abs_pi8(a_.m64);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabs_s8
#define vabs_s8(a) simde_vabs_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vabs_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabs_s16(a);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_abs_pi16(a_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabs_s16
#define vabs_s16(a) simde_vabs_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vabs_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabs_s32(a);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_abs_pi32(a_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabs_s32
#define vabs_s32(a) simde_vabs_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vabs_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabs_s64(a);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabs_s64
#define vabs_s64(a) simde_vabs_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vabsq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabsq_f32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_abs(a);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_abs(a_.v128);
#elif defined(SIMDE_X86_SSE_NATIVE)
simde_float32 mask_;
uint32_t u32_ = UINT32_C(0x7FFFFFFF);
simde_memcpy(&mask_, &u32_, sizeof(u32_));
r_.m128 = _mm_and_ps(_mm_set1_ps(mask_), a_.m128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_math_fabsf(a_.values[i]);
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabsq_f32
#define vabsq_f32(a) simde_vabsq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vabsq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabsq_f64(a);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_abs(a);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE)
simde_float64 mask_;
uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
simde_memcpy(&mask_, &u64_, sizeof(u64_));
r_.m128d = _mm_and_pd(_mm_set1_pd(mask_), a_.m128d);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_math_fabs(a_.values[i]);
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabsq_f64
#define vabsq_f64(a) simde_vabsq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vabsq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabsq_s8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_abs(a);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE)
r_.m128i = _mm_abs_epi8(a_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_min_epu8(a_.m128i, _mm_sub_epi8(_mm_setzero_si128(), a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_abs(a_.v128);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabsq_s8
#define vabsq_s8(a) simde_vabsq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabsq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabsq_s16(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_abs(a);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE)
r_.m128i = _mm_abs_epi16(a_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_max_epi16(a_.m128i, _mm_sub_epi16(_mm_setzero_si128(), a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_abs(a_.v128);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabsq_s16
#define vabsq_s16(a) simde_vabsq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabsq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabsq_s32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_abs(a);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a);
#if defined(SIMDE_X86_SSSE3_NATIVE)
r_.m128i = _mm_abs_epi32(a_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a_.m128i);
r_.m128i = _mm_sub_epi32(_mm_xor_si128(a_.m128i, m), m);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_abs(a_.v128);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabsq_s32
#define vabsq_s32(a) simde_vabsq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vabsq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabsq_s64(a);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)
return vec_abs(a);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_abs_epi64(a_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a_.m128i, 0xF5), 31);
r_.m128i = _mm_sub_epi64(_mm_xor_si128(a_.m128i, m), m);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_abs(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
r_.values = (-a_.values & m) | (a_.values & ~m);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabsq_s64
#define vabsq_s64(a) simde_vabsq_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ABS_H) */

View File

@@ -0,0 +1,744 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ADD_H)
#define SIMDE_ARM_NEON_ADD_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float16
simde_vaddh_f16(simde_float16 a, simde_float16 b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vaddh_f16(a, b);
#else
simde_float32 af = simde_float16_to_float32(a);
simde_float32 bf = simde_float16_to_float32(b);
return simde_float16_from_float32(af + bf);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vaddh_f16
#define vaddh_f16(a, b) simde_vaddh_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vaddd_s64(int64_t a, int64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddd_s64(a, b);
#else
return a + b;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddd_s64
#define vaddd_s64(a, b) simde_vaddd_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vaddd_u64(uint64_t a, uint64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddd_u64(a, b);
#else
return a + b;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddd_u64
#define vaddd_u64(a, b) simde_vaddd_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vadd_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vadd_f16(a, b);
#else
simde_float16x4_private
r_,
a_ = simde_float16x4_to_private(a),
b_ = simde_float16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]);
}
return simde_float16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vadd_f16
#define vadd_f16(a, b) simde_vadd_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_f32(a, b);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_f32
#define vadd_f32(a, b) simde_vadd_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vadd_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vadd_f64(a, b);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vadd_f64
#define vadd_f64(a, b) simde_vadd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_s8(a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#elif defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_add_pi8(a_.m64, b_.m64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_s8
#define vadd_s8(a, b) simde_vadd_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_s16(a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#elif defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_add_pi16(a_.m64, b_.m64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_s16
#define vadd_s16(a, b) simde_vadd_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_s32(a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#elif defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_add_pi32(a_.m64, b_.m64);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_s32
#define vadd_s32(a, b) simde_vadd_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vadd_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_s64(a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_s64
#define vadd_s64(a, b) simde_vadd_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_u8
#define vadd_u8(a, b) simde_vadd_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_u16
#define vadd_u16(a, b) simde_vadd_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_u32
#define vadd_u32(a, b) simde_vadd_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vadd_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vadd_u64
#define vadd_u64(a, b) simde_vadd_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vaddq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vaddq_f16(a, b);
#else
simde_float16x8_private
r_,
a_ = simde_float16x8_to_private(a),
b_ = simde_float16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]);
}
return simde_float16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vaddq_f16
#define vaddq_f16(a, b) simde_vaddq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vaddq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_;
a_ = a;
b_ = b;
r_ = vec_add(a_, b_);
return r_;
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE)
r_.m128 = _mm_add_ps(a_.m128, b_.m128);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_f32
#define vaddq_f32(a, b) simde_vaddq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vaddq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_add(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128d = _mm_add_pd(a_.m128d, b_.m128d);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddq_f64
#define vaddq_f64(a, b) simde_vaddq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_add_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_s8
#define vaddq_s8(a, b) simde_vaddq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_add_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_s16
#define vaddq_s16(a, b) simde_vaddq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_add_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_s32
#define vaddq_s32(a, b) simde_vaddq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vaddq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_add(a, b);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_add_epi64(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_add(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_s64
#define vaddq_s64(a, b) simde_vaddq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_u8
#define vaddq_u8(a, b) simde_vaddq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_u16
#define vaddq_u16(a, b) simde_vaddq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_add(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_u32
#define vaddq_u32(a, b) simde_vaddq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_add(a, b);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values + b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddq_u64
#define vaddq_u64(a, b) simde_vaddq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADD_H) */

View File

@@ -0,0 +1,211 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ADDHN_H)
#define SIMDE_ARM_NEON_ADDHN_H
#include "add.h"
#include "shr_n.h"
#include "movn.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vaddhn_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_s16(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_int8x8_private r_;
simde_int8x16_private tmp_ =
simde_int8x16_to_private(
simde_vreinterpretq_s8_s16(
simde_vaddq_s16(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14);
#endif
return simde_int8x8_from_private(r_);
#else
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddq_s16(a, b), 8));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_s16
#define vaddhn_s16(a, b) simde_vaddhn_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vaddhn_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_s32(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_int16x4_private r_;
simde_int16x8_private tmp_ =
simde_int16x8_to_private(
simde_vreinterpretq_s16_s32(
simde_vaddq_s32(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6);
#endif
return simde_int16x4_from_private(r_);
#else
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddq_s32(a, b), 16));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_s32
#define vaddhn_s32(a, b) simde_vaddhn_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vaddhn_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_s64(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_int32x2_private r_;
simde_int32x4_private tmp_ =
simde_int32x4_to_private(
simde_vreinterpretq_s32_s64(
simde_vaddq_s64(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2);
#endif
return simde_int32x2_from_private(r_);
#else
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddq_s64(a, b), 32));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_s64
#define vaddhn_s64(a, b) simde_vaddhn_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vaddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_u16(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_uint8x8_private r_;
simde_uint8x16_private tmp_ =
simde_uint8x16_to_private(
simde_vreinterpretq_u8_u16(
simde_vaddq_u16(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14);
#endif
return simde_uint8x8_from_private(r_);
#else
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddq_u16(a, b), 8));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_u16
#define vaddhn_u16(a, b) simde_vaddhn_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vaddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_u32(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_uint16x4_private r_;
simde_uint16x8_private tmp_ =
simde_uint16x8_to_private(
simde_vreinterpretq_u16_u32(
simde_vaddq_u32(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6);
#endif
return simde_uint16x4_from_private(r_);
#else
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddq_u32(a, b), 16));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_u32
#define vaddhn_u32(a, b) simde_vaddhn_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vaddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddhn_u64(a, b);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
simde_uint32x2_private r_;
simde_uint32x4_private tmp_ =
simde_uint32x4_to_private(
simde_vreinterpretq_u32_u64(
simde_vaddq_u64(a, b)
)
);
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3);
#else
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2);
#endif
return simde_uint32x2_from_private(r_);
#else
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddq_u64(a, b), 32));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddhn_u64
#define vaddhn_u64(a, b) simde_vaddhn_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDHN_H) */

View File

@@ -0,0 +1,127 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_ADDL_H)
#define SIMDE_ARM_NEON_ADDL_H
#include "add.h"
#include "movl.h"
#include "movl_high.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vaddl_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_s8(a, b);
#else
return simde_vaddq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_s8
#define vaddl_s8(a, b) simde_vaddl_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vaddl_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_s16(a, b);
#else
return simde_vaddq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_s16
#define vaddl_s16(a, b) simde_vaddl_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vaddl_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_s32(a, b);
#else
return simde_vaddq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_s32
#define vaddl_s32(a, b) simde_vaddl_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vaddl_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_u8(a, b);
#else
return simde_vaddq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_u8
#define vaddl_u8(a, b) simde_vaddl_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vaddl_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_u16(a, b);
#else
return simde_vaddq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_u16
#define vaddl_u16(a, b) simde_vaddl_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vaddl_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddl_u32(a, b);
#else
return simde_vaddq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddl_u32
#define vaddl_u32(a, b) simde_vaddl_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDL_H) */

View File

@@ -0,0 +1,127 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_ADDL_HIGH_H)
#define SIMDE_ARM_NEON_ADDL_HIGH_H
#include "add.h"
#include "movl.h"
#include "movl_high.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vaddl_high_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_s8(a, b);
#else
return simde_vaddq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_s8
#define vaddl_high_s8(a, b) simde_vaddl_high_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vaddl_high_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_s16(a, b);
#else
return simde_vaddq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_s16
#define vaddl_high_s16(a, b) simde_vaddl_high_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vaddl_high_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_s32(a, b);
#else
return simde_vaddq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_s32
#define vaddl_high_s32(a, b) simde_vaddl_high_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vaddl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_u8(a, b);
#else
return simde_vaddq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_u8
#define vaddl_high_u8(a, b) simde_vaddl_high_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vaddl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_u16(a, b);
#else
return simde_vaddq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_u16
#define vaddl_high_u16(a, b) simde_vaddl_high_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vaddl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddl_high_u32(a, b);
#else
return simde_vaddq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddl_high_u32
#define vaddl_high_u32(a, b) simde_vaddl_high_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) */

View File

@@ -0,0 +1,317 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ADDLV_H)
#define SIMDE_ARM_NEON_ADDLV_H
#include "types.h"
#include "movl.h"
#include "addv.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vaddlv_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_s16(simde_vmovl_s8(a));
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
int16_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_s8
#define vaddlv_s8(a) simde_vaddlv_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vaddlv_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_s32(simde_vmovl_s16(a));
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
int32_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_s16
#define vaddlv_s16(a) simde_vaddlv_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vaddlv_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_s64(simde_vmovl_s32(a));
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
int64_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_s32
#define vaddlv_s32(a) simde_vaddlv_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vaddlv_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_u8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_u16(simde_vmovl_u8(a));
#else
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
uint16_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_u8
#define vaddlv_u8(a) simde_vaddlv_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vaddlv_u16(simde_uint16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_u16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_u32(simde_vmovl_u16(a));
#else
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
uint32_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_u16
#define vaddlv_u16(a) simde_vaddlv_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vaddlv_u32(simde_uint32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlv_u32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddvq_u64(simde_vmovl_u32(a));
#else
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
uint64_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlv_u32
#define vaddlv_u32(a) simde_vaddlv_u32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vaddlvq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_s8(a);
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
int16_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_s8
#define vaddlvq_s8(a) simde_vaddlvq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vaddlvq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_s16(a);
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
int32_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_s16
#define vaddlvq_s16(a) simde_vaddlvq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vaddlvq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_s32(a);
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
int64_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_s32
#define vaddlvq_s32(a) simde_vaddlvq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vaddlvq_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_u8(a);
#else
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
uint16_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_u8
#define vaddlvq_u8(a) simde_vaddlvq_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vaddlvq_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_u16(a);
#else
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
uint32_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_u16
#define vaddlvq_u16(a) simde_vaddlvq_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vaddlvq_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddlvq_u32(a);
#else
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
uint64_t r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddlvq_u32
#define vaddlvq_u32(a) simde_vaddlvq_u32(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */

View File

@@ -0,0 +1,447 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ADDV_H)
#define SIMDE_ARM_NEON_ADDV_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vaddv_f32(simde_float32x2_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_f32(a);
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_f32
#define vaddv_f32(v) simde_vaddv_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vaddv_s8(simde_int8x8_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_s8(a);
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_s8
#define vaddv_s8(v) simde_vaddv_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vaddv_s16(simde_int16x4_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_s16(a);
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_s16
#define vaddv_s16(v) simde_vaddv_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vaddv_s32(simde_int32x2_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_s32(a);
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_s32
#define vaddv_s32(v) simde_vaddv_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vaddv_u8(simde_uint8x8_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_u8(a);
#else
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_u8
#define vaddv_u8(v) simde_vaddv_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vaddv_u16(simde_uint16x4_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_u16(a);
#else
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_u16
#define vaddv_u16(v) simde_vaddv_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vaddv_u32(simde_uint32x2_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddv_u32(a);
#else
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddv_u32
#define vaddv_u32(v) simde_vaddv_u32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vaddvq_f32(simde_float32x4_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_f32(a);
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_f32
#define vaddvq_f32(v) simde_vaddvq_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vaddvq_f64(simde_float64x2_t a) {
simde_float64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_f64(a);
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_f64
#define vaddvq_f64(v) simde_vaddvq_f64(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vaddvq_s8(simde_int8x16_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_s8(a);
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_s8
#define vaddvq_s8(v) simde_vaddvq_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vaddvq_s16(simde_int16x8_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_s16(a);
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_s16
#define vaddvq_s16(v) simde_vaddvq_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vaddvq_s32(simde_int32x4_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_s32(a);
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_s32
#define vaddvq_s32(v) simde_vaddvq_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vaddvq_s64(simde_int64x2_t a) {
int64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_s64(a);
#else
simde_int64x2_private a_ = simde_int64x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_s64
#define vaddvq_s64(v) simde_vaddvq_s64(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vaddvq_u8(simde_uint8x16_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u8(a);
#else
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_u8
#define vaddvq_u8(v) simde_vaddvq_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vaddvq_u16(simde_uint16x8_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u16(a);
#else
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_u16
#define vaddvq_u16(v) simde_vaddvq_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vaddvq_u32(simde_uint32x4_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u32(a);
#else
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_u32
#define vaddvq_u32(v) simde_vaddvq_u32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vaddvq_u64(simde_uint64x2_t a) {
uint64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vaddvq_u64(a);
#else
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(+:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r += a_.values[i];
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddvq_u64
#define vaddvq_u64(v) simde_vaddvq_u64(v)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDV_H) */

View File

@@ -0,0 +1,222 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_ADDW_H)
#define SIMDE_ARM_NEON_ADDW_H
#include "types.h"
#include "add.h"
#include "movl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_s8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s16(a, simde_vmovl_s8(b));
#else
simde_int16x8_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(a);
simde_int8x8_private b_ = simde_int8x8_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_s8
#define vaddw_s8(a, b) simde_vaddw_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s32(a, simde_vmovl_s16(b));
#else
simde_int32x4_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(a);
simde_int16x4_private b_ = simde_int16x4_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_s16
#define vaddw_s16(a, b) simde_vaddw_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_s32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s64(a, simde_vmovl_s32(b));
#else
simde_int64x2_private r_;
simde_int64x2_private a_ = simde_int64x2_to_private(a);
simde_int32x2_private b_ = simde_int32x2_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_s32
#define vaddw_s32(a, b) simde_vaddw_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_u8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u16(a, simde_vmovl_u8(b));
#else
simde_uint16x8_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
simde_uint8x8_private b_ = simde_uint8x8_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_u8
#define vaddw_u8(a, b) simde_vaddw_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_u16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u32(a, simde_vmovl_u16(b));
#else
simde_uint32x4_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
simde_uint16x4_private b_ = simde_uint16x4_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_u16
#define vaddw_u16(a, b) simde_vaddw_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vaddw_u32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u64(a, simde_vmovl_u32(b));
#else
simde_uint64x2_private r_;
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
simde_uint32x2_private b_ = simde_uint32x2_to_private(b);
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
r_.values += a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vaddw_u32
#define vaddw_u32(a, b) simde_vaddw_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDW_H) */

View File

@@ -0,0 +1,191 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H)
#define SIMDE_ARM_NEON_ADDW_HIGH_H
#include "types.h"
#include "movl_high.h"
#include "add.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_s8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s16(a, simde_vmovl_high_s8(b));
#else
simde_int16x8_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(a);
simde_int8x16_private b_ = simde_int8x16_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_s8
#define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s32(a, simde_vmovl_high_s16(b));
#else
simde_int32x4_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(a);
simde_int16x8_private b_ = simde_int16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_s16
#define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_s32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_s64(a, simde_vmovl_high_s32(b));
#else
simde_int64x2_private r_;
simde_int64x2_private a_ = simde_int64x2_to_private(a);
simde_int32x4_private b_ = simde_int32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_s32
#define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_u8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u16(a, simde_vmovl_high_u8(b));
#else
simde_uint16x8_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_u8
#define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_u16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u32(a, simde_vmovl_high_u16(b));
#else
simde_uint32x4_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
simde_uint16x8_private b_ = simde_uint16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_u16
#define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddw_high_u32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vaddq_u64(a, simde_vmovl_high_u32(b));
#else
simde_uint64x2_private r_;
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
simde_uint32x4_private b_ = simde_uint32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
}
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vaddw_high_u32
#define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */

View File

@@ -0,0 +1,552 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_AND_H)
#define SIMDE_ARM_NEON_AND_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vand_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_s8(a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_s8
#define vand_s8(a, b) simde_vand_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vand_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_s16(a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_s16
#define vand_s16(a, b) simde_vand_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vand_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_s32(a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_s32
#define vand_s32(a, b) simde_vand_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vand_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_s64(a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_s64
#define vand_s64(a, b) simde_vand_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vand_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_u8
#define vand_u8(a, b) simde_vand_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vand_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_u16
#define vand_u16(a, b) simde_vand_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vand_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_u32
#define vand_u32(a, b) simde_vand_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vand_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vand_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vand_u64
#define vand_u64(a, b) simde_vand_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vandq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_s8
#define vandq_s8(a, b) simde_vandq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vandq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_s16
#define vandq_s16(a, b) simde_vandq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vandq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_s32
#define vandq_s32(a, b) simde_vandq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vandq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_and(a, b);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_s64
#define vandq_s64(a, b) simde_vandq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vandq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_u8
#define vandq_u8(a, b) simde_vandq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vandq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_u16
#define vandq_u16(a, b) simde_vandq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vandq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_and(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_u32
#define vandq_u32(a, b) simde_vandq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vandq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vandq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_and(a, b);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values & b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vandq_u64
#define vandq_u64(a, b) simde_vandq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_AND_H) */

View File

@@ -0,0 +1,154 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_BCAX_H)
#define SIMDE_ARM_NEON_BCAX_H
#include "types.h"
#include "eor.h"
#include "bic.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vbcaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_u8(a, b, c);
#else
return simde_veorq_u8(a, simde_vbicq_u8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_u8
#define vbcaxq_u8(a, b, c) simde_vbcaxq_u8(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vbcaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_u16(a, b, c);
#else
return simde_veorq_u16(a, simde_vbicq_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_u16
#define vbcaxq_u16(a, b, c) simde_vbcaxq_u16(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vbcaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_u32(a, b, c);
#else
return simde_veorq_u32(a, simde_vbicq_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_u32
#define vbcaxq_u32(a, b, c) simde_vbcaxq_u32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vbcaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_u64(a, b, c);
#else
return simde_veorq_u64(a, simde_vbicq_u64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_u64
#define vbcaxq_u64(a, b, c) simde_vbcaxq_u64(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vbcaxq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_s8(a, b, c);
#else
return simde_veorq_s8(a, simde_vbicq_s8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_s8
#define vbcaxq_s8(a, b, c) simde_vbcaxq_s8(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vbcaxq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_s16(a, b, c);
#else
return simde_veorq_s16(a,simde_vbicq_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_s16
#define vbcaxq_s16(a, b, c) simde_vbcaxq_s16(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vbcaxq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_s32(a, b, c);
#else
return simde_veorq_s32(a, simde_vbicq_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_s32
#define vbcaxq_s32(a, b, c) simde_vbcaxq_s32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vbcaxq_s64(simde_int64x2_t a, simde_int64x2_t b, simde_int64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
return vbcaxq_s64(a, b, c);
#else
return simde_veorq_s64(a, simde_vbicq_s64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
#undef vbcaxq_s64
#define vbcaxq_s64(a, b, c) simde_vbcaxq_s64(a, b, c)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_BCAX_H) */

View File

@@ -0,0 +1,504 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_BIC_H)
#define SIMDE_ARM_NEON_BIC_H
#include "dup_n.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_s8
#define vbic_s8(a, b) simde_vbic_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_s16
#define vbic_s16(a, b) simde_vbic_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_s32
#define vbic_s32(a, b) simde_vbic_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_s64
#define vbic_s64(a, b) simde_vbic_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_u8(a, b);
#else
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_u8
#define vbic_u8(a, b) simde_vbic_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_u16(a, b);
#else
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_u16
#define vbic_u16(a, b) simde_vbic_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_u32(a, b);
#else
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_u32
#define vbic_u32(a, b) simde_vbic_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbic_u64(a, b);
#else
simde_uint64x1_private
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b),
r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbic_u64
#define vbic_u64(a, b) simde_vbic_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_s8
#define vbicq_s8(a, b) simde_vbicq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_s16
#define vbicq_s16(a, b) simde_vbicq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_s32
#define vbicq_s32(a, b) simde_vbicq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_andc(a, b);
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_s64
#define vbicq_s64(a, b) simde_vbicq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_u8
#define vbicq_u8(a, b) simde_vbicq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_uint16x8_private
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_u16
#define vbicq_u16(a, b) simde_vbicq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_andc(a, b);
#else
simde_uint32x4_private
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_u32
#define vbicq_u32(a, b) simde_vbicq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbicq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_andc(a, b);
#else
simde_uint64x2_private
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b),
r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] & ~b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbicq_u64
#define vbicq_u64(a, b) simde_vbicq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_BIC_H) */

View File

@@ -0,0 +1,761 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_BSL_H)
#define SIMDE_ARM_NEON_BSL_H
#include "types.h"
#include "reinterpret.h"
#include "and.h"
#include "eor.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vbsl_f16(simde_uint16x4_t a, simde_float16x4_t b, simde_float16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vbsl_f16(a, b, c);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(b)),
c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_f16_u16(simde_uint16x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_f16
#define vbsl_f16(a, b, c) simde_vbsl_f16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vbsl_f32(simde_uint32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_f32(a, b, c);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(b)),
c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_f32_u32(simde_uint32x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_f32
#define vbsl_f32(a, b, c) simde_vbsl_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vbsl_f64(simde_uint64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vbsl_f64(a, b, c);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(b)),
c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_f64_u64(simde_uint64x1_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vbsl_f64
#define vbsl_f64(a, b, c) simde_vbsl_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vbsl_s8(simde_uint8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_s8(a, b, c);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(b)),
c_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_s8_u8(simde_uint8x8_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_s8
#define vbsl_s8(a, b, c) simde_vbsl_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vbsl_s16(simde_uint16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_s16(a, b, c);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(b)),
c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_s16_u16(simde_uint16x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_s16
#define vbsl_s16(a, b, c) simde_vbsl_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vbsl_s32(simde_uint32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_s32(a, b, c);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(b)),
c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_s32_u32(simde_uint32x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_s32
#define vbsl_s32(a, b, c) simde_vbsl_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vbsl_s64(simde_uint64x1_t a, simde_int64x1_t b, simde_int64x1_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_s64(a, b, c);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(b)),
c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpret_s64_u64(simde_uint64x1_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_s64
#define vbsl_s64(a, b, c) simde_vbsl_s64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_u8(a, b, c);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b),
c_ = simde_uint8x8_to_private(c);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_u8
#define vbsl_u8(a, b, c) simde_vbsl_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vbsl_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_u16(a, b, c);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b),
c_ = simde_uint16x4_to_private(c);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_u16
#define vbsl_u16(a, b, c) simde_vbsl_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vbsl_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_u32(a, b, c);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b),
c_ = simde_uint32x2_to_private(c);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_u32
#define vbsl_u32(a, b, c) simde_vbsl_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vbsl_u64(simde_uint64x1_t a, simde_uint64x1_t b, simde_uint64x1_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbsl_u64(a, b, c);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b),
c_ = simde_uint64x1_to_private(c);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbsl_u64
#define vbsl_u64(a, b, c) simde_vbsl_u64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vbslq_f16(simde_uint16x8_t a, simde_float16x8_t b, simde_float16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vbslq_f16(a, b, c);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(b)),
c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_f16_u16(simde_uint16x8_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_f16
#define vbslq_f16(a, b, c) simde_vbslq_f16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vbslq_f32(simde_uint32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_f32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(b)),
c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_f32_u32(simde_uint32x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_f32
#define vbslq_f32(a, b, c) simde_vbslq_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vbslq_f64(simde_uint64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vbslq_f64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(b)),
c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_f64_u64(simde_uint64x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vbslq_f64
#define vbslq_f64(a, b, c) simde_vbslq_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vbslq_s8(simde_uint8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_s8(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(b)),
c_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_s8_u8(simde_uint8x16_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_s8
#define vbslq_s8(a, b, c) simde_vbslq_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vbslq_s16(simde_uint16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_s16(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(b)),
c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_s16_u16(simde_uint16x8_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_s16
#define vbslq_s16(a, b, c) simde_vbslq_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vbslq_s32(simde_uint32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_s32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(b)),
c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_s32_u32(simde_uint32x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_s32
#define vbslq_s32(a, b, c) simde_vbslq_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vbslq_s64(simde_uint64x2_t a, simde_int64x2_t b, simde_int64x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_s64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return
simde_vreinterpretq_s64_s32(
simde_vbslq_s32(
simde_vreinterpretq_u32_u64(a),
simde_vreinterpretq_s32_s64(b),
simde_vreinterpretq_s32_s64(c)
)
);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), c),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b),
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a));
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(b)),
c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(c));
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_vreinterpretq_s64_u64(simde_uint64x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_s64
#define vbslq_s64(a, b, c) simde_vbslq_s64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_u8(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b),
c_ = simde_uint8x16_to_private(c);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_u8
#define vbslq_u8(a, b, c) simde_vbslq_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vbslq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_u16(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b),
c_ = simde_uint16x8_to_private(c);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_u16
#define vbslq_u16(a, b, c) simde_vbslq_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vbslq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_u32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_sel(c, b, a);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b),
c_ = simde_uint32x4_to_private(c);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_u32
#define vbslq_u32(a, b, c) simde_vbslq_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vbslq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vbslq_u64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return
simde_vreinterpretq_u64_u32(
simde_vbslq_u32(
simde_vreinterpretq_u32_u64(a),
simde_vreinterpretq_u32_u64(b),
simde_vreinterpretq_u32_u64(c)
)
);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b),
c_ = simde_uint64x2_to_private(c);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vbslq_u64
#define vbslq_u64(a, b, c) simde_vbslq_u64((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_BSL_H) */

View File

@@ -0,0 +1,189 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_CAGE_H)
#define SIMDE_ARM_NEON_CAGE_H
#include "types.h"
#include "abs.h"
#include "cge.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcageh_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcageh_f16(a, b);
#else
simde_float32_t a_ = simde_float16_to_float32(a);
simde_float32_t b_ = simde_float16_to_float32(b);
return (simde_math_fabsf(a_) >= simde_math_fabsf(b_)) ? UINT16_MAX : UINT16_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcageh_f16
#define vcageh_f16(a, b) simde_vcageh_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcages_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcages_f32(a, b);
#else
return (simde_math_fabsf(a) >= simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcages_f32
#define vcages_f32(a, b) simde_vcages_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcaged_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcaged_f64(a, b);
#else
return (simde_math_fabs(a) >= simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcaged_f64
#define vcaged_f64(a, b) simde_vcaged_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcage_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcage_f16(a, b);
#else
simde_float16x4_private
a_ = simde_float16x4_to_private(a),
b_ = simde_float16x4_to_private(b);
simde_uint16x4_private r_;
SIMDE_VECTORIZE
for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcage_f16
#define vcage_f16(a, b) simde_vcage_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcage_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcage_f32(a, b);
#else
return simde_vcge_f32(simde_vabs_f32(a), simde_vabs_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcage_f32
#define vcage_f32(a, b) simde_vcage_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcage_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcage_f64(a, b);
#else
return simde_vcge_f64(simde_vabs_f64(a), simde_vabs_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcage_f64
#define vcage_f64(a, b) simde_vcage_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcageq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcageq_f16(a, b);
#else
simde_float16x8_private
a_ = simde_float16x8_to_private(a),
b_ = simde_float16x8_to_private(b);
simde_uint16x8_private r_;
SIMDE_VECTORIZE
for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcageq_f16
#define vcageq_f16(a, b) simde_vcageq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcageq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcageq_f32(a, b);
#else
return simde_vcgeq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcageq_f32
#define vcageq_f32(a, b) simde_vcageq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcageq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcageq_f64(a, b);
#else
return simde_vcgeq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcageq_f64
#define vcageq_f64(a, b) simde_vcageq_f64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CAGE_H) */

View File

@@ -0,0 +1,189 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_CAGT_H)
#define SIMDE_ARM_NEON_CAGT_H
#include "types.h"
#include "abs.h"
#include "cgt.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcagth_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcagth_f16(a, b);
#else
simde_float32_t
af = simde_float16_to_float32(a),
bf = simde_float16_to_float32(b);
return (simde_math_fabsf(af) > simde_math_fabsf(bf)) ? UINT16_MAX : UINT16_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcagth_f16
#define vcagth_f16(a, b) simde_vcagth_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcagts_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcagts_f32(a, b);
#else
return (simde_math_fabsf(a) > simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcagts_f32
#define vcagts_f32(a, b) simde_vcagts_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcagtd_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcagtd_f64(a, b);
#else
return (simde_math_fabs(a) > simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcagtd_f64
#define vcagtd_f64(a, b) simde_vcagtd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcagt_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcagt_f16(a, b);
#else
simde_uint16x4_private r_;
simde_float16x4_private
a_ = simde_float16x4_to_private(a),
b_ = simde_float16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcagt_f16
#define vcagt_f16(a, b) simde_vcagt_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcagt_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcagt_f32(a, b);
#else
return simde_vcgt_f32(simde_vabs_f32(a), simde_vabs_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcagt_f32
#define vcagt_f32(a, b) simde_vcagt_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcagt_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcagt_f64(a, b);
#else
return simde_vcgt_f64(simde_vabs_f64(a), simde_vabs_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcagt_f64
#define vcagt_f64(a, b) simde_vcagt_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcagtq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcagtq_f16(a, b);
#else
simde_uint16x8_private r_;
simde_float16x8_private
a_ = simde_float16x8_to_private(a),
b_ = simde_float16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcagtq_f16
#define vcagtq_f16(a, b) simde_vcagtq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcagtq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcagtq_f32(a, b);
#else
return simde_vcgtq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcagtq_f32
#define vcagtq_f32(a, b) simde_vcagtq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcagtq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcagtq_f64(a, b);
#else
return simde_vcgtq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcagtq_f64
#define vcagtq_f64(a, b) simde_vcagtq_f64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */

View File

@@ -0,0 +1,772 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_CEQ_H)
#define SIMDE_ARM_NEON_CEQ_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vceqh_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceqh_f16(a, b);
#else
return (simde_float16_to_float32(a) == simde_float16_to_float32(b)) ? UINT16_MAX : UINT16_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqh_f16
#define vceqh_f16(a, b) simde_vceqh_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vceqs_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqs_f32(a, b);
#else
return (a == b) ? ~UINT32_C(0) : UINT32_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqs_f32
#define vceqs_f32(a, b) simde_vceqs_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqd_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqd_f64(a, b);
#else
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqd_f64
#define vceqd_f64(a, b) simde_vceqd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqd_s64(int64_t a, int64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vceqd_s64(a, b));
#else
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqd_s64
#define vceqd_s64(a, b) simde_vceqd_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqd_u64(uint64_t a, uint64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqd_u64(a, b);
#else
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqd_u64
#define vceqd_u64(a, b) simde_vceqd_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceq_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceq_f16(a, b);
#else
simde_uint16x4_private r_;
simde_float16x4_private
a_ = simde_float16x4_to_private(a),
b_ = simde_float16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vceq_f16
#define vceq_f16(a, b) simde_vceq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceq_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_f32(a, b);
#else
simde_uint32x2_private r_;
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_f32
#define vceq_f32(a, b) simde_vceq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceq_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceq_f64(a, b);
#else
simde_uint64x1_private r_;
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceq_f64
#define vceq_f64(a, b) simde_vceq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vceq_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_s8(a, b);
#else
simde_uint8x8_private r_;
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpeq_pi8(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_s8
#define vceq_s8(a, b) simde_vceq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceq_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_s16(a, b);
#else
simde_uint16x4_private r_;
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpeq_pi16(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_s16
#define vceq_s16(a, b) simde_vceq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceq_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_s32(a, b);
#else
simde_uint32x2_private r_;
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpeq_pi32(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_s32
#define vceq_s32(a, b) simde_vceq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceq_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceq_s64(a, b);
#else
simde_uint64x1_private r_;
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_s64
#define vceq_s64(a, b) simde_vceq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vceq_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_u8(a, b);
#else
simde_uint8x8_private r_;
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_u8
#define vceq_u8(a, b) simde_vceq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceq_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_u16(a, b);
#else
simde_uint16x4_private r_;
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_u16
#define vceq_u16(a, b) simde_vceq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceq_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceq_u32(a, b);
#else
simde_uint32x2_private r_;
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_u32
#define vceq_u32(a, b) simde_vceq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceq_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceq_u64(a, b);
#else
simde_uint64x1_private r_;
simde_uint64x1_private
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceq_u64
#define vceq_u64(a, b) simde_vceq_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceqq_f16(a, b);
#else
simde_uint16x8_private r_;
simde_float16x8_private
a_ = simde_float16x8_to_private(a),
b_ = simde_float16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vceqq_f16
#define vceqq_f16(a, b) simde_vceqq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
#else
simde_uint32x4_private r_;
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_cmpeq_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_eq(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_f32
#define vceqq_f32(a, b) simde_vceqq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
#else
simde_uint64x2_private r_;
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castpd_si128(_mm_cmpeq_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_eq(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqq_f64
#define vceqq_f64(a, b) simde_vceqq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vceqq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b));
#else
simde_uint8x16_private r_;
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_eq(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_s8
#define vceqq_s8(a, b) simde_vceqq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b));
#else
simde_uint16x8_private r_;
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_eq(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_s16
#define vceqq_s16(a, b) simde_vceqq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
#else
simde_uint32x4_private r_;
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_eq(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_s32
#define vceqq_s32(a, b) simde_vceqq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
#else
simde_uint64x2_private r_;
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_s64
#define vceqq_s64(a, b) simde_vceqq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vceqq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b));
#else
simde_uint8x16_private r_;
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_u8
#define vceqq_u8(a, b) simde_vceqq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b));
#else
simde_uint16x8_private r_;
simde_uint16x8_private
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_u16
#define vceqq_u16(a, b) simde_vceqq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vceqq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
#else
simde_uint32x4_private r_;
simde_uint32x4_private
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_u32
#define vceqq_u32(a, b) simde_vceqq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
#else
simde_uint64x2_private r_;
simde_uint64x2_private
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqq_u64
#define vceqq_u64(a, b) simde_vceqq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CEQ_H) */

View File

@@ -0,0 +1,421 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_CEQZ_H)
#define SIMDE_ARM_NEON_CEQZ_H
#include "ceq.h"
#include "dup_n.h"
#include "types.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceqz_f16(simde_float16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceqz_f16(a);
#else
return simde_vceq_f16(a, simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0)));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vceqz_f16
#define vceqz_f16(a) simde_vceqz_f16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceqz_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_f32(a);
#else
return simde_vceq_f32(a, simde_vdup_n_f32(0.0f));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_f32
#define vceqz_f32(a) simde_vceqz_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceqz_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_f64(a);
#else
return simde_vceq_f64(a, simde_vdup_n_f64(0.0));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqz_f64
#define vceqz_f64(a) simde_vceqz_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vceqz_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_s8(a);
#else
return simde_vceq_s8(a, simde_vdup_n_s8(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_s8
#define vceqz_s8(a) simde_vceqz_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceqz_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_s16(a);
#else
return simde_vceq_s16(a, simde_vdup_n_s16(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_s16
#define vceqz_s16(a) simde_vceqz_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceqz_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_s32(a);
#else
return simde_vceq_s32(a, simde_vdup_n_s32(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_s32
#define vceqz_s32(a) simde_vceqz_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceqz_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_s64(a);
#else
return simde_vceq_s64(a, simde_vdup_n_s64(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_s64
#define vceqz_s64(a) simde_vceqz_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vceqz_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_u8(a);
#else
return simde_vceq_u8(a, simde_vdup_n_u8(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_u8
#define vceqz_u8(a) simde_vceqz_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vceqz_u16(simde_uint16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_u16(a);
#else
return simde_vceq_u16(a, simde_vdup_n_u16(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_u16
#define vceqz_u16(a) simde_vceqz_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vceqz_u32(simde_uint32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_u32(a);
#else
return simde_vceq_u32(a, simde_vdup_n_u32(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_u32
#define vceqz_u32(a) simde_vceqz_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vceqz_u64(simde_uint64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqz_u64(a);
#else
return simde_vceq_u64(a, simde_vdup_n_u64(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqz_u64
#define vceqz_u64(a) simde_vceqz_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqzq_f16(simde_float16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceqzq_f16(a);
#else
return simde_vceqq_f16(a, simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0)));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vceqzq_f16
#define vceqzq_f16(a) simde_vceqzq_f16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqzq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_f32(a);
#else
return simde_vceqq_f32(a, simde_vdupq_n_f32(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_f32
#define vceqzq_f32(a) simde_vceqzq_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqzq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_f64(a);
#else
return simde_vceqq_f64(a, simde_vdupq_n_f64(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqzq_f64
#define vceqzq_f64(a) simde_vceqzq_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vceqzq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_s8(a);
#else
return simde_vceqq_s8(a, simde_vdupq_n_s8(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_s8
#define vceqzq_s8(a) simde_vceqzq_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqzq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_s16(a);
#else
return simde_vceqq_s16(a, simde_vdupq_n_s16(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_s16
#define vceqzq_s16(a) simde_vceqzq_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqzq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_s32(a);
#else
return simde_vceqq_s32(a, simde_vdupq_n_s32(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_s32
#define vceqzq_s32(a) simde_vceqzq_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqzq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_s64(a);
#else
return simde_vceqq_s64(a, simde_vdupq_n_s64(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_s64
#define vceqzq_s64(a) simde_vceqzq_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vceqzq_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_u8(a);
#else
return simde_vceqq_u8(a, simde_vdupq_n_u8(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_u8
#define vceqzq_u8(a) simde_vceqzq_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vceqzq_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_u16(a);
#else
return simde_vceqq_u16(a, simde_vdupq_n_u16(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_u16
#define vceqzq_u16(a) simde_vceqzq_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vceqzq_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_u32(a);
#else
return simde_vceqq_u32(a, simde_vdupq_n_u32(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_u32
#define vceqzq_u32(a) simde_vceqzq_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vceqzq_u64(simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzq_u64(a);
#else
return simde_vceqq_u64(a, simde_vdupq_n_u64(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzq_u64
#define vceqzq_u64(a) simde_vceqzq_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqzd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vceqzd_s64(a));
#else
return simde_vceqd_s64(a, INT64_C(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzd_s64
#define vceqzd_s64(a) simde_vceqzd_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqzd_u64(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzd_u64(a);
#else
return simde_vceqd_u64(a, UINT64_C(0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzd_u64
#define vceqzd_u64(a) simde_vceqzd_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vceqzh_f16(simde_float16 a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vceqzh_f16(a);
#else
return simde_vceqh_f16(a, SIMDE_FLOAT16_VALUE(0.0));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vceqzh_f16
#define vceqzh_f16(a) simde_vceqzh_f16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vceqzs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzs_f32(a);
#else
return simde_vceqs_f32(a, SIMDE_FLOAT32_C(0.0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzs_f32
#define vceqzs_f32(a) simde_vceqzs_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vceqzd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vceqzd_f64(a);
#else
return simde_vceqd_f64(a, SIMDE_FLOAT64_C(0.0));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vceqzd_f64
#define vceqzd_f64(a) simde_vceqzd_f64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CEQZ_H) */

View File

@@ -0,0 +1,816 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CGE_H)
#define SIMDE_ARM_NEON_CGE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcgeh_f16(simde_float16_t a, simde_float16_t b){
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(uint16_t, vcgeh_f16(a, b));
#else
return (simde_float16_to_float32(a) >= simde_float16_to_float32(b)) ? UINT16_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgeh_f16
#define vcgeh_f16(a, b) simde_vcgeh_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgeq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcgeq_f16(a, b);
#else
simde_float16x8_private
a_ = simde_float16x8_to_private(a),
b_ = simde_float16x8_to_private(b);
simde_uint16x8_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcgeq_f16
#define vcgeq_f16(a, b) simde_vcgeq_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgeq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
#else
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_cmpge_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_f32
#define vcgeq_f32(a, b) simde_vcgeq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgeq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgeq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
#else
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgeq_f64
#define vcgeq_f64(a, b) simde_vcgeq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgeq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b));
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
simde_uint8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(a_.m128i, b_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_s8
#define vcgeq_s8(a, b) simde_vcgeq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgeq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b));
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
simde_uint16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(a_.m128i, b_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_s16
#define vcgeq_s16(a, b) simde_vcgeq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgeq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, b_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_s32
#define vcgeq_s32(a, b) simde_vcgeq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgeq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgeq_s64(a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(a, b), 63))));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(a_.m128i, b_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgeq_s64
#define vcgeq_s64(a, b) simde_vcgeq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b));
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i =
_mm_cmpeq_epi8(
_mm_min_epu8(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_u8
#define vcgeq_u8(a, b) simde_vcgeq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b));
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi16(
_mm_min_epu16(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_u16
#define vcgeq_u16(a, b) simde_vcgeq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgeq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi32(
_mm_min_epu32(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgeq_u32
#define vcgeq_u32(a, b) simde_vcgeq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgeq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i =
_mm_cmpeq_epi64(
_mm_min_epu64(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgeq_u64
#define vcgeq_u64(a, b) simde_vcgeq_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcge_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcge_f16(a, b);
#else
simde_float16x4_private
a_ = simde_float16x4_to_private(a),
b_ = simde_float16x4_to_private(b);
simde_uint16x4_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]);
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcge_f16
#define vcge_f16(a, b) simde_vcge_f16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcge_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_f32(a, b);
#else
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_f32
#define vcge_f32(a, b) simde_vcge_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcge_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcge_f64(a, b);
#else
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcge_f64
#define vcge_f64(a, b) simde_vcge_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcge_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
simde_uint8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(a_.m64, b_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_s8
#define vcge_s8(a, b) simde_vcge_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcge_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
simde_uint16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(a_.m64, b_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_s16
#define vcge_s16(a, b) simde_vcge_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcge_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(a_.m64, b_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_s32
#define vcge_s32(a, b) simde_vcge_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcge_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcge_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcge_s64
#define vcge_s64(a, b) simde_vcge_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcge_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_u8
#define vcge_u8(a, b) simde_vcge_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcge_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_u16
#define vcge_u16(a, b) simde_vcge_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcge_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcge_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcge_u32
#define vcge_u32(a, b) simde_vcge_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcge_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcge_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcge_u64
#define vcge_u64(a, b) simde_vcge_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcged_f64(simde_float64_t a, simde_float64_t b){
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcged_f64(a, b));
#else
return (a >= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcged_f64
#define vcged_f64(a, b) simde_vcged_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcged_s64(int64_t a, int64_t b){
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcged_s64(a, b));
#else
return (a >= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcged_s64
#define vcged_s64(a, b) simde_vcged_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcged_u64(uint64_t a, uint64_t b){
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcged_u64(a, b));
#else
return (a >= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcged_u64
#define vcged_u64(a, b) simde_vcged_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcges_f32(simde_float32_t a, simde_float32_t b){
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcges_f32(a, b));
#else
return (a >= b) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcges_f32
#define vcges_f32(a, b) simde_vcges_f32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CGE_H) */

View File

@@ -0,0 +1,420 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CGEZ_H)
#define SIMDE_ARM_NEON_CGEZ_H
#include "cge.h"
#include "dup_n.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgezd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgezd_f64(a));
#else
return (a >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezd_f64
#define vcgezd_f64(a) simde_vcgezd_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgezd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgezd_s64(a));
#else
return (a >= 0) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezd_s64
#define vcgezd_s64(a) simde_vcgezd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcgezs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcgezs_f32(a));
#else
return (a >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezs_f32
#define vcgezs_f32(a) simde_vcgezs_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgezq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezs_f32(a_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_f32
#define vcgezq_f32(a) simde_vcgezq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgezq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezd_f64(a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_f64
#define vcgezq_f64(a) simde_vcgezq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgezq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_s8(a, simde_vdupq_n_s8(0));
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
simde_uint8x16_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_s8
#define vcgezq_s8(a) simde_vcgezq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgezq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_s16(a, simde_vdupq_n_s16(0));
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
simde_uint16x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_s16
#define vcgezq_s16(a) simde_vcgezq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgezq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_s32(a, simde_vdupq_n_s32(0));
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_s32
#define vcgezq_s32(a) simde_vcgezq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgezq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgezq_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgeq_s64(a, simde_vdupq_n_s64(0));
#else
simde_int64x2_private a_ = simde_int64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezd_s64(a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgezq_s64
#define vcgezq_s64(a) simde_vcgezq_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgez_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezs_f32(a_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_f32
#define vcgez_f32(a) simde_vcgez_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgez_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x1_private a_ = simde_float64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezd_f64(a_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_f64
#define vcgez_f64(a) simde_vcgez_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcgez_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_s8(a, simde_vdup_n_s8(0));
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
simde_uint8x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_s8
#define vcgez_s8(a) simde_vcgez_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgez_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_s16(a, simde_vdup_n_s16(0));
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
simde_uint16x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_s16
#define vcgez_s16(a) simde_vcgez_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgez_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_s32(a, simde_vdup_n_s32(0));
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_s32
#define vcgez_s32(a) simde_vcgez_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgez_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgez_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcge_s64(a, simde_vdup_n_s64(0));
#else
simde_int64x1_private a_ = simde_int64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgezd_s64(a_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgez_s64
#define vcgez_s64(a) simde_vcgez_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CGEZ_H) */

View File

@@ -0,0 +1,743 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CGT_H)
#define SIMDE_ARM_NEON_CGT_H
#include "combine.h"
#include "get_low.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtd_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_f64(a, b));
#else
return (a > b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtd_f64
#define vcgtd_f64(a, b) simde_vcgtd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtd_s64(int64_t a, int64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_s64(a, b));
#else
return (a > b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtd_s64
#define vcgtd_s64(a, b) simde_vcgtd_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtd_u64(uint64_t a, uint64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_u64(a, b));
#else
return (a > b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtd_u64
#define vcgtd_u64(a, b) simde_vcgtd_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcgts_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcgts_f32(a, b));
#else
return (a > b) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgts_f32
#define vcgts_f32(a, b) simde_vcgts_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
#else
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_f32
#define vcgtq_f32(a, b) simde_vcgtq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgtq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
#else
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtq_f64
#define vcgtq_f64(a, b) simde_vcgtq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgtq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b));
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
simde_uint8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpgt_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_s8
#define vcgtq_s8(a, b) simde_vcgtq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgtq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b));
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
simde_uint16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpgt_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_s16
#define vcgtq_s16(a, b) simde_vcgtq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmpgt_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_s32
#define vcgtq_s32(a, b) simde_vcgtq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgtq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtq_s64(a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(b, a), 63));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_cmpgt_epi64(a_.m128i, b_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://stackoverflow.com/a/65175746/501126 */
__m128i r = _mm_and_si128(_mm_cmpeq_epi32(a_.m128i, b_.m128i), _mm_sub_epi64(b_.m128i, a_.m128i));
r = _mm_or_si128(r, _mm_cmpgt_epi32(a_.m128i, b_.m128i));
r_.m128i = _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtq_s64
#define vcgtq_s64(a, b) simde_vcgtq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgtq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b));
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp = _mm_subs_epu8(a_.m128i, b_.m128i);
r_.m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_u8
#define vcgtq_u8(a, b) simde_vcgtq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgtq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b));
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp = _mm_subs_epu16(a_.m128i, b_.m128i);
r_.m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_u16
#define vcgtq_u16(a, b) simde_vcgtq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgtq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i =
_mm_xor_si128(
_mm_cmpgt_epi32(a_.m128i, b_.m128i),
_mm_srai_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 31)
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_gt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgtq_u32
#define vcgtq_u32(a, b) simde_vcgtq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgtq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_SSE4_2_NATIVE)
__m128i sign_bit = _mm_set1_epi64x(INT64_MIN);
r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bit), _mm_xor_si128(b_.m128i, sign_bit));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtq_u64
#define vcgtq_u64(a, b) simde_vcgtq_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgt_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_f32(a, b);
#else
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_f32
#define vcgt_f32(a, b) simde_vcgt_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgt_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgt_f64(a, b);
#else
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgt_f64
#define vcgt_f64(a, b) simde_vcgt_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcgt_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
simde_uint8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi8(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_s8
#define vcgt_s8(a, b) simde_vcgt_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgt_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
simde_uint16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi16(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_s16
#define vcgt_s16(a, b) simde_vcgt_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgt_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi32(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_s32
#define vcgt_s32(a, b) simde_vcgt_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgt_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgt_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgt_s64
#define vcgt_s64(a, b) simde_vcgt_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcgt_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bit = _mm_set1_pi8(INT8_MIN);
r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_u8
#define vcgt_u8(a, b) simde_vcgt_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgt_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bit = _mm_set1_pi16(INT16_MIN);
r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_u16
#define vcgt_u16(a, b) simde_vcgt_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgt_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcgt_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bit = _mm_set1_pi32(INT32_MIN);
r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcgt_u32
#define vcgt_u32(a, b) simde_vcgt_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgt_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgt_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgt_u64
#define vcgt_u64(a, b) simde_vcgt_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CGT_H) */

View File

@@ -0,0 +1,422 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CGTZ_H)
#define SIMDE_ARM_NEON_CGTZ_H
#include "cgt.h"
#include "combine.h"
#include "dup_n.h"
#include "get_low.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtzd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_s64(a));
#else
return (a > 0) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzd_s64
#define vcgtzd_s64(a) simde_vcgtzd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtzd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_f64(a));
#else
return (a > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzd_f64
#define vcgtzd_f64(a) simde_vcgtzd_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcgtzs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcgtzs_f32(a));
#else
return (a > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzs_f32
#define vcgtzs_f32(a) simde_vcgtzs_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtzq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_f32
#define vcgtzq_f32(a) simde_vcgtzq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgtzq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_f64
#define vcgtzq_f64(a) simde_vcgtzq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcgtzq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_s8(a, simde_vdupq_n_s8(0));
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
simde_uint8x16_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_s8
#define vcgtzq_s8(a) simde_vcgtzq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgtzq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_s16(a, simde_vdupq_n_s16(0));
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
simde_uint16x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_s16
#define vcgtzq_s16(a) simde_vcgtzq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtzq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_s32(a, simde_vdupq_n_s32(0));
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_s32
#define vcgtzq_s32(a) simde_vcgtzq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcgtzq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtzq_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgtq_s64(a, simde_vdupq_n_s64(0));
#else
simde_int64x2_private a_ = simde_int64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzq_s64
#define vcgtzq_s64(a) simde_vcgtzq_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgtz_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_f32
#define vcgtz_f32(a) simde_vcgtz_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgtz_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x1_private a_ = simde_float64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_f64
#define vcgtz_f64(a) simde_vcgtz_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcgtz_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_s8(a, simde_vdup_n_s8(0));
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
simde_uint8x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_s8
#define vcgtz_s8(a) simde_vcgtz_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgtz_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_s16(a, simde_vdup_n_s16(0));
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
simde_uint16x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_s16
#define vcgtz_s16(a) simde_vcgtz_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcgtz_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_s32(a, simde_vdup_n_s32(0));
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_s32
#define vcgtz_s32(a) simde_vcgtz_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcgtz_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcgtz_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcgt_s64(a, simde_vdup_n_s64(0));
#else
simde_int64x1_private a_ = simde_int64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtz_s64
#define vcgtz_s64(a) simde_vcgtz_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CGTZ_H) */

View File

@@ -0,0 +1,776 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CLE_H)
#define SIMDE_ARM_NEON_CLE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcled_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcled_f64(a, b));
#else
return (a <= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcled_f64
#define vcled_f64(a, b) simde_vcled_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcled_s64(int64_t a, int64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcled_s64(a, b));
#else
return (a <= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcled_s64
#define vcled_s64(a, b) simde_vcled_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcled_u64(uint64_t a, uint64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcled_u64(a, b));
#else
return (a <= b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcled_u64
#define vcled_u64(a, b) simde_vcled_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcles_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcles_f32(a, b));
#else
return (a <= b) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcles_f32
#define vcles_f32(a, b) simde_vcles_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
#else
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_cmple_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_f32
#define vcleq_f32(a, b) simde_vcleq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcleq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcleq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
#else
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castpd_si128(_mm_cmple_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcleq_f64
#define vcleq_f64(a, b) simde_vcleq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcleq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b));
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
simde_uint8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(b_.m128i, a_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_s8
#define vcleq_s8(a, b) simde_vcleq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcleq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b));
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
simde_uint16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(b_.m128i, a_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_s16
#define vcleq_s16(a, b) simde_vcleq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcleq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(b_.m128i, a_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_s32
#define vcleq_s32(a, b) simde_vcleq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcleq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcleq_s64(a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(b, a), 63))));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(b_.m128i, a_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcleq_s64
#define vcleq_s64(a, b) simde_vcleq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcleq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b));
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
/* http://www.alfredklomp.com/programming/sse-intrinsics/ */
r_.m128i =
_mm_cmpeq_epi8(
_mm_min_epu8(a_.m128i, b_.m128i),
a_.m128i
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_u8
#define vcleq_u8(a, b) simde_vcleq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcleq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b));
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi16(
_mm_min_epu16(a_.m128i, b_.m128i),
a_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
r_.m128i =
_mm_or_si128(
_mm_cmpgt_epi16(
_mm_xor_si128(b_.m128i, sign_bits),
_mm_xor_si128(a_.m128i, sign_bits)
),
_mm_cmpeq_epi16(a_.m128i, b_.m128i)
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_u16
#define vcleq_u16(a, b) simde_vcleq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcleq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcleq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi32(
_mm_min_epu32(a_.m128i, b_.m128i),
a_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
r_.m128i =
_mm_or_si128(
_mm_cmpgt_epi32(
_mm_xor_si128(b_.m128i, sign_bits),
_mm_xor_si128(a_.m128i, sign_bits)
),
_mm_cmpeq_epi32(a_.m128i, b_.m128i)
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_le(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcleq_u32
#define vcleq_u32(a, b) simde_vcleq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcleq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i =
_mm_cmpeq_epi64(
_mm_min_epu64(a_.m128i, b_.m128i),
a_.m128i
);
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
r_.m128i =
_mm_or_si128(
_mm_cmpgt_epi64(
_mm_xor_si128(b_.m128i, sign_bits),
_mm_xor_si128(a_.m128i, sign_bits)
),
_mm_cmpeq_epi64(a_.m128i, b_.m128i)
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcleq_u64
#define vcleq_u64(a, b) simde_vcleq_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_f32(a, b);
#else
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_f32
#define vcle_f32(a, b) simde_vcle_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcle_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcle_f64(a, b);
#else
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcle_f64
#define vcle_f64(a, b) simde_vcle_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcle_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
simde_uint8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(b_.m64, a_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_s8
#define vcle_s8(a, b) simde_vcle_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcle_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
simde_uint16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(b_.m64, a_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_s16
#define vcle_s16(a, b) simde_vcle_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcle_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(b_.m64, a_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_s32
#define vcle_s32(a, b) simde_vcle_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcle_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcle_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcle_s64
#define vcle_s64(a, b) simde_vcle_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcle_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_u8
#define vcle_u8(a, b) simde_vcle_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcle_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_u16
#define vcle_u16(a, b) simde_vcle_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcle_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcle_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcle_u32
#define vcle_u32(a, b) simde_vcle_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcle_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcle_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcle_u64
#define vcle_u64(a, b) simde_vcle_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLE_H) */

View File

@@ -0,0 +1,420 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CLEZ_H)
#define SIMDE_ARM_NEON_CLEZ_H
#include "cle.h"
#include "dup_n.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vclezd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vclezd_s64(a));
#else
return (a <= 0) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezd_s64
#define vclezd_s64(a) simde_vclezd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vclezd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vclezd_f64(a));
#else
return (a <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezd_f64
#define vclezd_f64(a) simde_vclezd_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vclezs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vclezs_f32(a));
#else
return (a <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezs_f32
#define vclezs_f32(a) simde_vclezs_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vclezq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_f32
#define vclezq_f32(a) simde_vclezq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vclezq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_f64
#define vclezq_f64(a) simde_vclezq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vclezq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_s8(a, simde_vdupq_n_s8(0));
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
simde_uint8x16_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_s8
#define vclezq_s8(a) simde_vclezq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vclezq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_s16(a, simde_vdupq_n_s16(0));
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
simde_uint16x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_s16
#define vclezq_s16(a) simde_vclezq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vclezq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_s32(a, simde_vdupq_n_s32(0));
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_s32
#define vclezq_s32(a) simde_vclezq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vclezq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclezq_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcleq_s64(a, simde_vdupq_n_s64(0));
#else
simde_int64x2_private a_ = simde_int64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclezq_s64
#define vclezq_s64(a) simde_vclezq_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclez_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_f32
#define vclez_f32(a) simde_vclez_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vclez_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x1_private a_ = simde_float64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_f64
#define vclez_f64(a) simde_vclez_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vclez_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_s8(a, simde_vdup_n_s8(0));
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
simde_uint8x8_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_s8
#define vclez_s8(a) simde_vclez_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vclez_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_s16(a, simde_vdup_n_s16(0));
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
simde_uint16x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_s16
#define vclez_s16(a) simde_vclez_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclez_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_s32(a, simde_vdup_n_s32(0));
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_s32
#define vclez_s32(a) simde_vclez_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vclez_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclez_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcle_s64(a, simde_vdup_n_s64(0));
#else
simde_int64x1_private a_ = simde_int64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0;
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclez_s64
#define vclez_s64(a) simde_vclez_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLEZ_H) */

View File

@@ -0,0 +1,148 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_CLS_H)
#define SIMDE_ARM_NEON_CLS_H
#include "types.h"
#include "bsl.h"
#include "clz.h"
#include "cltz.h"
#include "dup_n.h"
#include "mvn.h"
#include "sub.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vcls_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcls_s8(a);
#else
return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1)));
#endif
}
#define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcls_s8
#define vcls_s8(a) simde_vcls_s8(a)
#undef vcls_u8
#define vcls_u8(a) simde_vcls_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vcls_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcls_s16(a);
#else
return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1)));
#endif
}
#define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcls_s16
#define vcls_s16(a) simde_vcls_s16(a)
#undef vcls_u16
#define vcls_u16(a) simde_vcls_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vcls_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcls_s32(a);
#else
return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1)));
#endif
}
#define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcls_s32
#define vcls_s32(a) simde_vcls_s32(a)
#undef vcls_u32
#define vcls_u32(a) simde_vcls_u32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vclsq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclsq_s8(a);
#else
return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1)));
#endif
}
#define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclsq_s8
#define vclsq_s8(a) simde_vclsq_s8(a)
#undef vclsq_u8
#define vclsq_u8(a) simde_vclsq_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vclsq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclsq_s16(a);
#else
return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1)));
#endif
}
#define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclsq_s16
#define vclsq_s16(a) simde_vclsq_s16(a)
#undef vclsq_u16
#define vclsq_u16(a) simde_vclsq_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vclsq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclsq_s32(a);
#else
return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1)));
#endif
}
#define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a))
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclsq_s32
#define vclsq_s32(a) simde_vclsq_s32(a)
#undef vclsq_u32
#define vclsq_u32(a) simde_vclsq_u32(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLS_H) */

View File

@@ -0,0 +1,751 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_CLT_H)
#define SIMDE_ARM_NEON_CLT_H
#include "combine.h"
#include "get_low.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcltd_f64(simde_float64_t a, simde_float64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcltd_f64(a, b));
#else
return (a < b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltd_f64
#define vcltd_f64(a, b) simde_vcltd_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcltd_s64(int64_t a, int64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcltd_s64(a, b));
#else
return (a < b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltd_s64
#define vcltd_s64(a, b) simde_vcltd_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcltd_u64(uint64_t a, uint64_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcltd_u64(a, b));
#else
return (a < b) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltd_u64
#define vcltd_u64(a, b) simde_vcltd_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vclts_f32(simde_float32_t a, simde_float32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vclts_f32(a, b));
#else
return (a < b) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclts_f32
#define vclts_f32(a, b) simde_vclts_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcltq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
#else
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_cmplt_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_f32
#define vcltq_f32(a, b) simde_vcltq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcltq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
#else
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castpd_si128(_mm_cmplt_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltq_f64
#define vcltq_f64(a, b) simde_vcltq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcltq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b));
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
simde_uint8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmplt_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_s8
#define vcltq_s8(a, b) simde_vcltq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcltq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b));
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
simde_uint16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmplt_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_s16
#define vcltq_s16(a, b) simde_vcltq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcltq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_cmplt_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_s32
#define vcltq_s32(a, b) simde_vcltq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcltq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltq_s64(a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(a, b), 63));
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE4_2_NATIVE)
r_.m128i = _mm_cmpgt_epi64(b_.m128i, a_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltq_s64
#define vcltq_s64(a, b) simde_vcltq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcltq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b));
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(
_mm_cmpeq_epi8(b_.m128i, a_.m128i),
_mm_cmpeq_epi8(_mm_max_epu8(b_.m128i, a_.m128i), b_.m128i)
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_u8
#define vcltq_u8(a, b) simde_vcltq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcltq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b));
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_andnot_si128(
_mm_cmpeq_epi16(b_.m128i, a_.m128i),
_mm_cmpeq_epi16(_mm_max_epu16(b_.m128i, a_.m128i), b_.m128i)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
r_.m128i = _mm_cmplt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_u16
#define vcltq_u16(a, b) simde_vcltq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcltq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcltq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_andnot_si128(
_mm_cmpeq_epi32(b_.m128i, a_.m128i),
_mm_cmpeq_epi32(_mm_max_epu32(b_.m128i, a_.m128i), b_.m128i)
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
r_.m128i = _mm_cmplt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_lt(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltq_u32
#define vcltq_u32(a, b) simde_vcltq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcltq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_andnot_si128(
_mm_cmpeq_epi64(b_.m128i, a_.m128i),
_mm_cmpeq_epi64(_mm_max_epu64(b_.m128i, a_.m128i), b_.m128i)
);
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(b_.m128i, sign_bits), _mm_xor_si128(a_.m128i, sign_bits));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltq_u64
#define vcltq_u64(a, b) simde_vcltq_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclt_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_f32(a, b);
#else
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_f32
#define vclt_f32(a, b) simde_vclt_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vclt_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclt_f64(a, b);
#else
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclt_f64
#define vclt_f64(a, b) simde_vclt_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vclt_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
simde_uint8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi8(b_.m64, a_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_s8
#define vclt_s8(a, b) simde_vclt_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vclt_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
simde_uint16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi16(b_.m64, a_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_s16
#define vclt_s16(a, b) simde_vclt_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclt_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
simde_uint32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_cmpgt_pi32(b_.m64, a_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_s32
#define vclt_s32(a, b) simde_vclt_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vclt_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclt_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclt_s64
#define vclt_s64(a, b) simde_vclt_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vclt_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_u8
#define vclt_u8(a, b) simde_vclt_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vclt_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_u16
#define vclt_u16(a, b) simde_vclt_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclt_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclt_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclt_u32
#define vclt_u32(a, b) simde_vclt_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vclt_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vclt_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vclt_u64
#define vclt_u64(a, b) simde_vclt_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLT_H) */

View File

@@ -0,0 +1,327 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
/* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */
#if !defined(SIMDE_ARM_NEON_CLTZ_H)
#define SIMDE_ARM_NEON_CLTZ_H
#include "types.h"
#include "shr_n.h"
#include "reinterpret.h"
#include "clt.h"
#include "dup_n.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcltzd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcltzd_s64(a));
#else
return (a < 0) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltzd_s64
#define vcltzd_s64(a) simde_vcltzd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcltzd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcltzd_f64(a));
#else
return (a < SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltzd_f64
#define vcltzd_f64(a) simde_vcltzd_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcltzs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcltzs_f32(a));
#else
return (a < SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltzs_f32
#define vcltzs_f32(a) simde_vcltzs_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcltz_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
simde_uint32x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltz_f32
#define vcltz_f32(a) simde_vcltz_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcltz_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x1_private a_ = simde_float64x1_to_private(a);
simde_uint64x1_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltz_f64
#define vcltz_f64(a) simde_vcltz_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcltz_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_s8(a, simde_vdup_n_s8(0));
#else
return simde_vreinterpret_u8_s8(simde_vshr_n_s8(a, 7));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltz_s8
#define vcltz_s8(a) simde_vcltz_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcltz_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_s16(a, simde_vdup_n_s16(0));
#else
return simde_vreinterpret_u16_s16(simde_vshr_n_s16(a, 15));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltz_s16
#define vcltz_s16(a) simde_vcltz_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcltz_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_s32(a, simde_vdup_n_s32(0));
#else
return simde_vreinterpret_u32_s32(simde_vshr_n_s32(a, 31));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltz_s32
#define vcltz_s32(a) simde_vcltz_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcltz_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltz_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vclt_s64(a, simde_vdup_n_s64(0));
#else
return simde_vreinterpret_u64_s64(simde_vshr_n_s64(a, 63));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltz_s64
#define vcltz_s64(a) simde_vcltz_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcltzq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_f32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
simde_uint32x4_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltzq_f32
#define vcltzq_f32(a) simde_vcltzq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcltzq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_f64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
simde_uint64x2_private r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcltzq_f64
#define vcltzq_f64(a) simde_vcltzq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcltzq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_s8(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_s8(a, simde_vdupq_n_s8(0));
#else
return simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(a, 7));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltzq_s8
#define vcltzq_s8(a) simde_vcltzq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcltzq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_s16(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_s16(a, simde_vdupq_n_s16(0));
#else
return simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(a, 15));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltzq_s16
#define vcltzq_s16(a) simde_vcltzq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcltzq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_s32(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_s32(a, simde_vdupq_n_s32(0));
#else
return simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(a, 31));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltzq_s32
#define vcltzq_s32(a) simde_vcltzq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcltzq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcltzq_s64(a);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vcltq_s64(a, simde_vdupq_n_s64(0));
#else
return simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(a, 63));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcltzq_s64
#define vcltzq_s64(a) simde_vcltzq_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLTZ_H) */

View File

@@ -0,0 +1,427 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_CLZ_H)
#define SIMDE_ARM_NEON_CLZ_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_x_vclzb_u8(uint8_t a) {
#if \
defined(SIMDE_BUILTIN_SUFFIX_8_) && \
( \
SIMDE_BUILTIN_HAS_8_(clz) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
)
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(r);
return HEDLEY_STATIC_CAST(uint8_t, SIMDE_BUILTIN_8_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_8_, a)));
#else
uint8_t r;
uint8_t shift;
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(r);
r = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x0F)) << 2); a >>= r;
shift = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x03)) << 1); a >>= shift; r |= shift;
r |= (a >> 1);
return ((8 * sizeof(r)) - 1) - r;
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_x_vclzh_u16(uint16_t a) {
#if \
defined(SIMDE_BUILTIN_SUFFIX_16_) && \
( \
SIMDE_BUILTIN_HAS_16_(clz) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
)
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(r);
return HEDLEY_STATIC_CAST(uint16_t, SIMDE_BUILTIN_16_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_16_, a)));
#else
uint16_t r;
uint16_t shift;
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(r);
r = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x00FF)) << 3); a >>= r;
shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x000F)) << 2); a >>= shift; r |= shift;
shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x0003)) << 1); a >>= shift; r |= shift;
r |= (a >> 1);
return ((8 * sizeof(r)) - 1) - r;
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_x_vclzs_u32(uint32_t a) {
#if \
defined(SIMDE_BUILTIN_SUFFIX_32_) && \
( \
SIMDE_BUILTIN_HAS_32_(clz) || \
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
)
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(a);
return HEDLEY_STATIC_CAST(uint32_t, SIMDE_BUILTIN_32_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_32_, a)));
#else
uint32_t r;
uint32_t shift;
if (HEDLEY_UNLIKELY(a == 0))
return 8 * sizeof(a);
r = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0xFFFF)) << 4); a >>= r;
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x00FF)) << 3); a >>= shift; r |= shift;
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x000F)) << 2); a >>= shift; r |= shift;
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x0003)) << 1); a >>= shift; r |= shift;
r |= (a >> 1);
return ((8 * sizeof(r)) - 1) - r;
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_x_vclzb_s8(int8_t a) {
return HEDLEY_STATIC_CAST(int8_t, simde_x_vclzb_u8(HEDLEY_STATIC_CAST(uint8_t, a)));
}
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_x_vclzh_s16(int16_t a) {
return HEDLEY_STATIC_CAST(int16_t, simde_x_vclzh_u16(HEDLEY_STATIC_CAST(uint16_t, a)));
}
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_x_vclzs_s32(int32_t a) {
return HEDLEY_STATIC_CAST(int32_t, simde_x_vclzs_u32(HEDLEY_STATIC_CAST(uint32_t, a)));
}
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vclz_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_s8(a);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzb_s8(a_.values[i]);
}
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_s8
#define vclz_s8(a) simde_vclz_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vclz_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_s16(a);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzh_s16(a_.values[i]);
}
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_s16
#define vclz_s16(a) simde_vclz_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vclz_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_s32(a);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzs_s32(a_.values[i]);
}
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_s32
#define vclz_s32(a) simde_vclz_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vclz_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_u8(a);
#else
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzb_u8(a_.values[i]);
}
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_u8
#define vclz_u8(a) simde_vclz_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vclz_u16(simde_uint16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_u16(a);
#else
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzh_u16(a_.values[i]);
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_u16
#define vclz_u16(a) simde_vclz_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vclz_u32(simde_uint32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclz_u32(a);
#else
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzs_u32(a_.values[i]);
}
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclz_u32
#define vclz_u32(a) simde_vclz_u32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vclzq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_s8(a);
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
r_;
#if defined(SIMDE_X86_GFNI_NATIVE)
/* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */
a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0);
a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i);
r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzb_s8(a_.values[i]);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_s8
#define vclzq_s8(a) simde_vclzq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vclzq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_s16(a);
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzh_s16(a_.values[i]);
}
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_s16
#define vclzq_s16(a) simde_vclzq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vclzq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_s32(a);
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzs_s32(a_.values[i]);
}
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_s32
#define vclzq_s32(a) simde_vclzq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vclzq_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_u8(a);
#else
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
r_;
#if defined(SIMDE_X86_GFNI_NATIVE)
a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0);
a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i);
r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzb_u8(a_.values[i]);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_u8
#define vclzq_u8(a) simde_vclzq_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vclzq_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_u16(a);
#else
simde_uint16x8_private
a_ = simde_uint16x8_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzh_u16(a_.values[i]);
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_u16
#define vclzq_u16(a) simde_vclzq_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vclzq_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vclzq_u32(a);
#else
simde_uint32x4_private
a_ = simde_uint32x4_to_private(a),
r_;
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_vclzs_u32(a_.values[i]);
}
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vclzq_u32
#define vclzq_u32(a) simde_vclzq_u32(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CLZ_H) */

View File

@@ -0,0 +1,132 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_CMLA_H)
#define SIMDE_ARM_NEON_CMLA_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vcmla_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmla_f32(r, a, b);
#else
simde_float32x2_private
r_ = simde_float32x2_to_private(r),
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] += b_.values[i] * a_.values[i & 2];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmla_f32
#define vcmla_f32(r, a, b) simde_vcmla_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vcmlaq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_f32(r, a, b);
#else
simde_float32x4_private
r_ = simde_float32x4_to_private(r),
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] += b_.values[i] * a_.values[i & 2];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_f32
#define vcmlaq_f32(r, a, b) simde_vcmlaq_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vcmlaq_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_f64(r, a, b);
#else
simde_float64x2_private
r_ = simde_float64x2_to_private(r),
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] += b_.values[i] * a_.values[i & 2];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_f64
#define vcmlaq_f64(r, a, b) simde_vcmlaq_f64(r, a, b)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */

View File

@@ -0,0 +1,138 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_H)
#define SIMDE_ARM_NEON_CMLA_ROT180_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vcmla_rot180_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmla_rot180_f32(r, a, b);
#else
simde_float32x2_private
r_ = simde_float32x2_to_private(r),
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmla_rot180_f32
#define vcmla_rot180_f32(r, a, b) simde_vcmla_rot180_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vcmlaq_rot180_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot180_f32(r, a, b);
#else
simde_float32x4_private
r_ = simde_float32x4_to_private(r),
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, -b_.values, 0, 1, 2, 3);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot180_f32
#define vcmlaq_rot180_f32(r, a, b) simde_vcmlaq_rot180_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vcmlaq_rot180_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot180_f64(r, a, b);
#else
simde_float64x2_private
r_ = simde_float64x2_to_private(r),
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0);
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, -b_.values, 0, 1);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot180_f64
#define vcmlaq_rot180_f64(r, a, b) simde_vcmlaq_rot180_f64(r, a, b)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) */

View File

@@ -0,0 +1,138 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_H)
#define SIMDE_ARM_NEON_CMLA_ROT270_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vcmla_rot270_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmla_rot270_f32(r, a, b);
#else
simde_float32x2_private
r_ = simde_float32x2_to_private(r),
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmla_rot270_f32
#define vcmla_rot270_f32(r, a, b) simde_vcmla_rot270_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vcmlaq_rot270_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot270_f32(r, a, b);
#else
simde_float32x4_private
r_ = simde_float32x4_to_private(r),
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot270_f32
#define vcmlaq_rot270_f32(r, a, b) simde_vcmlaq_rot270_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vcmlaq_rot270_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot270_f64(r, a, b);
#else
simde_float64x2_private
r_ = simde_float64x2_to_private(r),
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1);
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot270_f64
#define vcmlaq_rot270_f64(r, a, b) simde_vcmlaq_rot270_f64(r, a, b)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) */

View File

@@ -0,0 +1,138 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_H)
#define SIMDE_ARM_NEON_CMLA_ROT90_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vcmla_rot90_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmla_rot90_f32(r, a, b);
#else
simde_float32x2_private
r_ = simde_float32x2_to_private(r),
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmla_rot90_f32
#define vcmla_rot90_f32(r, a, b) simde_vcmla_rot90_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vcmlaq_rot90_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot90_f32(r, a, b);
#else
simde_float32x4_private
r_ = simde_float32x4_to_private(r),
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot90_f32
#define vcmlaq_rot90_f32(r, a, b) simde_vcmlaq_rot90_f32(r, a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vcmlaq_rot90_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
return vcmlaq_rot90_f64(r, a, b);
#else
simde_float64x2_private
r_ = simde_float64x2_to_private(r),
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_SHUFFLE_VECTOR_)
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1);
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2);
r_.values += b_.values * a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vcmlaq_rot90_f64
#define vcmlaq_rot90_f64(r, a, b) simde_vcmlaq_rot90_f64(r, a, b)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) */

View File

@@ -0,0 +1,170 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_CNT_H)
#define SIMDE_ARM_NEON_CNT_H
#include "types.h"
#include "reinterpret.h"
#include <limits.h>
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_x_arm_neon_cntb(uint8_t v) {
v = v - ((v >> 1) & (85));
v = (v & (51)) + ((v >> (2)) & (51));
v = (v + (v >> (4))) & (15);
return HEDLEY_STATIC_CAST(uint8_t, v) >> (sizeof(uint8_t) - 1) * CHAR_BIT;
}
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vcnt_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcnt_s8(a);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i])));
}
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcnt_s8
#define vcnt_s8(a) simde_vcnt_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcnt_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcnt_u8(a);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]);
}
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcnt_u8
#define vcnt_u8(a) simde_vcnt_u8((a))
#endif
/* The x86 implementations are stolen from
* https://github.com/WebAssembly/simd/pull/379. They could be cleaned
* up a bit if someone is bored; they're mostly just direct
* translations from the assembly. */
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vcntq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcntq_s8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a)));
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE)
r_.m128i = _mm_popcnt_epi8(a_.m128i);
#elif defined(SIMDE_X86_AVX2_NATIVE)
__m128i tmp0 = _mm_set1_epi8(0x0f);
__m128i tmp1 = _mm_andnot_si128(tmp0, a_.m128i);
__m128i y = _mm_and_si128(tmp0, a_.m128i);
tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp1 = _mm_srli_epi16(tmp1, 4);
y = _mm_shuffle_epi8(tmp0, y);
tmp1 = _mm_shuffle_epi8(tmp0, tmp1);
r_.m128i = _mm_add_epi8(y, tmp1);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
__m128i tmp0 = _mm_set1_epi8(0x0f);
__m128i tmp1 = a_.m128i;
tmp1 = _mm_and_si128(tmp1, tmp0);
tmp0 = _mm_andnot_si128(tmp0, a_.m128i);
__m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp0 = _mm_srli_epi16(tmp0, 4);
y = _mm_shuffle_epi8(y, tmp1);
tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
tmp1 = _mm_shuffle_epi8(tmp1, tmp0);
r_.m128i = _mm_add_epi8(y, tmp1);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i tmp = _mm_and_si128(_mm_srli_epi16(a_.m128i, 1), _mm_set1_epi8(0x55));
a_.m128i = _mm_sub_epi8(a_.m128i, tmp);
tmp = a_.m128i;
a_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x33));
tmp = _mm_and_si128(_mm_srli_epi16(tmp, 2), _mm_set1_epi8(0x33));
a_.m128i = _mm_add_epi8(a_.m128i, tmp);
tmp = _mm_srli_epi16(a_.m128i, 4);
a_.m128i = _mm_add_epi8(a_.m128i, tmp);
r_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x0f));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i])));
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcntq_s8
#define vcntq_s8(a) simde_vcntq_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcntq_u8(simde_uint8x16_t a) {
return simde_vreinterpretq_u8_s8(simde_vcntq_s8(simde_vreinterpretq_s8_u8(a)));
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcntq_u8
#define vcntq_u8(a) simde_vcntq_u8((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CNT_H) */

View File

@@ -0,0 +1,343 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the folhighing conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_COMBINE_H)
#define SIMDE_ARM_NEON_COMBINE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_f32(low, high);
#else
simde_float32x4_private r_;
simde_float32x2_private
low_ = simde_float32x2_to_private(low),
high_ = simde_float32x2_to_private(high);
/* Note: __builtin_shufflevector can have a the output contain
* twice the number of elements, __builtin_shuffle cannot.
* Using SIMDE_SHUFFLE_VECTOR_ here would not work. */
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_f32
#define vcombine_f32(low, high) simde_vcombine_f32((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcombine_f64(low, high);
#else
simde_float64x2_private r_;
simde_float64x1_private
low_ = simde_float64x1_to_private(low),
high_ = simde_float64x1_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcombine_f64
#define vcombine_f64(low, high) simde_vcombine_f64((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_s8(low, high);
#else
simde_int8x16_private r_;
simde_int8x8_private
low_ = simde_int8x8_to_private(low),
high_ = simde_int8x8_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_s8
#define vcombine_s8(low, high) simde_vcombine_s8((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_s16(low, high);
#else
simde_int16x8_private r_;
simde_int16x4_private
low_ = simde_int16x4_to_private(low),
high_ = simde_int16x4_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_s16
#define vcombine_s16(low, high) simde_vcombine_s16((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_s32(low, high);
#else
simde_int32x4_private r_;
simde_int32x2_private
low_ = simde_int32x2_to_private(low),
high_ = simde_int32x2_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_s32
#define vcombine_s32(low, high) simde_vcombine_s32((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_s64(low, high);
#else
simde_int64x2_private r_;
simde_int64x1_private
low_ = simde_int64x1_to_private(low),
high_ = simde_int64x1_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_s64
#define vcombine_s64(low, high) simde_vcombine_s64((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_u8(low, high);
#else
simde_uint8x16_private r_;
simde_uint8x8_private
low_ = simde_uint8x8_to_private(low),
high_ = simde_uint8x8_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_u8
#define vcombine_u8(low, high) simde_vcombine_u8((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_u16(low, high);
#else
simde_uint16x8_private r_;
simde_uint16x4_private
low_ = simde_uint16x4_to_private(low),
high_ = simde_uint16x4_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_u16
#define vcombine_u16(low, high) simde_vcombine_u16((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_u32(low, high);
#else
simde_uint32x4_private r_;
simde_uint32x2_private
low_ = simde_uint32x2_to_private(low),
high_ = simde_uint32x2_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_u32
#define vcombine_u32(low, high) simde_vcombine_u32((low), (high))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcombine_u64(low, high);
#else
simde_uint64x2_private r_;
simde_uint64x1_private
low_ = simde_uint64x1_to_private(low),
high_ = simde_uint64x1_to_private(high);
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
#else
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < halfway ; i++) {
r_.values[i] = low_.values[i];
r_.values[i + halfway] = high_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcombine_u64
#define vcombine_u64(low, high) simde_vcombine_u64((low), (high))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */

View File

@@ -0,0 +1,186 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
/* N.B. CM: vcreate_f16 and vcreate_bf16 are omitted as
* SIMDe has no 16-bit floating point support.
* Idem for the poly types. */
#if !defined(SIMDE_ARM_NEON_CREATE_H)
#define SIMDE_ARM_NEON_CREATE_H
#include "dup_n.h"
#include "reinterpret.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vcreate_s8(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_s8(a);
#else
return simde_vreinterpret_s8_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_s8
#define vcreate_s8(a) simde_vcreate_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vcreate_s16(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_s16(a);
#else
return simde_vreinterpret_s16_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_s16
#define vcreate_s16(a) simde_vcreate_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vcreate_s32(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_s32(a);
#else
return simde_vreinterpret_s32_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_s32
#define vcreate_s32(a) simde_vcreate_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vcreate_s64(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_s64(a);
#else
return simde_vreinterpret_s64_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_s64
#define vcreate_s64(a) simde_vcreate_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vcreate_u8(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_u8(a);
#else
return simde_vreinterpret_u8_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_u8
#define vcreate_u8(a) simde_vcreate_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcreate_u16(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_u16(a);
#else
return simde_vreinterpret_u16_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_u16
#define vcreate_u16(a) simde_vcreate_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vcreate_u32(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_u32(a);
#else
return simde_vreinterpret_u32_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_u32
#define vcreate_u32(a) simde_vcreate_u32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vcreate_u64(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_u64(a);
#else
return simde_vdup_n_u64(a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_u64
#define vcreate_u64(a) simde_vcreate_u64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vcreate_f32(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vcreate_f32(a);
#else
return simde_vreinterpret_f32_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_f32
#define vcreate_f32(a) simde_vcreate_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vcreate_f64(uint64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vcreate_f64(a);
#else
return simde_vreinterpret_f64_u64(simde_vdup_n_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vcreate_f64
#define vcreate_f64(a) simde_vcreate_f64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CREATE_H) */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,171 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_DOT_H)
#define SIMDE_ARM_NEON_DOT_H
#include "types.h"
#include "add.h"
#include "combine.h"
#include "dup_n.h"
#include "get_low.h"
#include "get_high.h"
#include "paddl.h"
#include "movn.h"
#include "mull.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vdot_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
return vdot_s32(r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return simde_vadd_s32(r, simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(a, b)))));
#else
simde_int32x2_private r_;
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]);
}
r_.values[i] = acc;
}
return simde_vadd_s32(r, simde_int32x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_s32
#define vdot_s32(r, a, b) simde_vdot_s32((r), (a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vdot_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
return vdot_u32(r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return simde_vadd_u32(r, simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(a, b)))));
#else
simde_uint32x2_private r_;
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]);
}
r_.values[i] = acc;
}
return simde_vadd_u32(r, simde_uint32x2_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_u32
#define vdot_u32(r, a, b) simde_vdot_u32((r), (a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vdotq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
return vdotq_s32(r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return simde_vaddq_s32(r,
simde_vcombine_s32(simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_low_s8(a), simde_vget_low_s8(b))))),
simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_high_s8(a), simde_vget_high_s8(b)))))));
#else
simde_int32x4_private r_;
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
for (int i = 0 ; i < 4 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]);
}
r_.values[i] = acc;
}
return simde_vaddq_s32(r, simde_int32x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_s32
#define vdotq_s32(r, a, b) simde_vdotq_s32((r), (a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vdotq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
return vdotq_u32(r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return simde_vaddq_u32(r,
simde_vcombine_u32(simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_low_u8(a), simde_vget_low_u8(b))))),
simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_high_u8(a), simde_vget_high_u8(b)))))));
#else
simde_uint32x4_private r_;
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
for (int i = 0 ; i < 4 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]);
}
r_.values[i] = acc;
}
return simde_vaddq_u32(r, simde_uint32x4_from_private(r_));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_u32
#define vdotq_u32(r, a, b) simde_vdotq_u32((r), (a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_DOT_H) */

View File

@@ -0,0 +1,491 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_DOT_LANE_H)
#define SIMDE_ARM_NEON_DOT_LANE_H
#include "types.h"
#include "add.h"
#include "dup_lane.h"
#include "paddl.h"
#include "movn.h"
#include "mull.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vdot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x2_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_2_(vdot_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x2_t
b_lane,
b_32 = vreinterpret_s32_s8(b);
SIMDE_CONSTIFY_2_(vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vadd_s32(
r,
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(a, vreinterpret_s8_s32(b_lane))
)
)
)
);
#else
simde_int32x2_private r_ = simde_int32x2_to_private(r);
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_int32x2_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_lane_s32
#define vdot_lane_s32(r, a, b, lane) simde_vdot_lane_s32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vdot_lane_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x2_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_2_(vdot_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x2_t
b_lane,
b_32 = vreinterpret_u32_u8(b);
SIMDE_CONSTIFY_2_(vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vadd_u32(
r,
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(a, vreinterpret_u8_u32(b_lane))
)
)
)
);
#else
simde_uint32x2_private r_ = simde_uint32x2_to_private(r);
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_uint32x2_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_lane_u32
#define vdot_lane_u32(r, a, b, lane) simde_vdot_lane_u32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vdot_laneq_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x16_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x2_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_4_(vdot_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x2_t b_lane;
simde_int32x4_t b_32 = vreinterpretq_s32_s8(b);
SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vadd_s32(
r,
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(a, vreinterpret_s8_s32(b_lane))
)
)
)
);
#else
simde_int32x2_private r_ = simde_int32x2_to_private(r);
simde_int8x8_private a_ = simde_int8x8_to_private(a);
simde_int8x16_private b_ = simde_int8x16_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_int32x2_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_laneq_s32
#define vdot_laneq_s32(r, a, b, lane) simde_vdot_laneq_s32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vdot_laneq_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x16_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x2_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_4_(vdot_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x2_t b_lane;
simde_uint32x4_t b_32 = vreinterpretq_u32_u8(b);
SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vadd_u32(
r,
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(a, vreinterpret_u8_u32(b_lane))
)
)
)
);
#else
simde_uint32x2_private r_ = simde_uint32x2_to_private(r);
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
for (int i = 0 ; i < 2 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for (int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_uint32x2_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdot_laneq_u32
#define vdot_laneq_u32(r, a, b, lane) simde_vdot_laneq_u32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vdotq_laneq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x4_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_4_(vdotq_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x4_t
b_lane,
b_32 = vreinterpretq_u32_u8(b);
SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vcombine_u32(
vadd_u32(
vget_low_u32(r),
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(vget_low_u8(a), vget_low_u8(vreinterpretq_u8_u32(b_lane)))
)
)
)
),
vadd_u32(
vget_high_u32(r),
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(vget_high_u8(a), vget_high_u8(vreinterpretq_u8_u32(b_lane)))
)
)
)
)
);
#else
simde_uint32x4_private r_ = simde_uint32x4_to_private(r);
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
for(int i = 0 ; i < 4 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for(int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_uint32x4_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_laneq_u32
#define vdotq_laneq_u32(r, a, b, lane) simde_vdotq_laneq_u32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vdotq_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x4_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_4_(vdotq_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x4_t
b_lane,
b_32 = vreinterpretq_s32_s8(b);
SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vcombine_s32(
vadd_s32(
vget_low_s32(r),
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(vget_low_s8(a), vget_low_s8(vreinterpretq_s8_s32(b_lane)))
)
)
)
),
vadd_s32(
vget_high_s32(r),
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(vget_high_s8(a), vget_high_s8(vreinterpretq_s8_s32(b_lane)))
)
)
)
)
);
#else
simde_int32x4_private r_ = simde_int32x4_to_private(r);
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
for(int i = 0 ; i < 4 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for(int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_int32x4_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_laneq_s32
#define vdotq_laneq_s32(r, a, b, lane) simde_vdotq_laneq_s32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vdotq_lane_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x4_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_2_(vdotq_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x2_t
b_lane,
b_32 = vreinterpret_u32_u8(b);
SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vcombine_u32(
vadd_u32(
vget_low_u32(r),
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(vget_low_u8(a), vreinterpret_u8_u32(b_lane))
)
)
)
),
vadd_u32(
vget_high_u32(r),
vmovn_u64(
vpaddlq_u32(
vpaddlq_u16(
vmull_u8(vget_high_u8(a), vreinterpret_u8_u32(b_lane))
)
)
)
)
);
#else
simde_uint32x4_private r_ = simde_uint32x4_to_private(r);
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
simde_uint8x8_private b_ = simde_uint8x8_to_private(b);
for(int i = 0 ; i < 4 ; i++) {
uint32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for(int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_uint32x4_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_lane_u32
#define vdotq_lane_u32(r, a, b, lane) simde_vdotq_lane_u32((r), (a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vdotq_lane_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x4_t result;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
SIMDE_CONSTIFY_2_(vdotq_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x2_t
b_lane,
b_32 = vreinterpret_s32_s8(b);
SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
result =
vcombine_s32(
vadd_s32(
vget_low_s32(r),
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(vget_low_s8(a), vreinterpret_s8_s32(b_lane))
)
)
)
),
vadd_s32(
vget_high_s32(r),
vmovn_s64(
vpaddlq_s32(
vpaddlq_s16(
vmull_s8(vget_high_s8(a), vreinterpret_s8_s32(b_lane))
)
)
)
)
);
#else
simde_int32x4_private r_ = simde_int32x4_to_private(r);
simde_int8x16_private a_ = simde_int8x16_to_private(a);
simde_int8x8_private b_ = simde_int8x8_to_private(b);
for(int i = 0 ; i < 4 ; i++) {
int32_t acc = 0;
SIMDE_VECTORIZE_REDUCTION(+:acc)
for(int j = 0 ; j < 4 ; j++) {
const int idx_b = j + (lane << 2);
const int idx_a = j + (i << 2);
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
}
r_.values[i] += acc;
}
result = simde_int32x4_from_private(r_);
#endif
return result;
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
#undef vdotq_lane_s32
#define vdotq_lane_s32(r, a, b, lane) simde_vdotq_lane_s32((r), (a), (b), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_DOT_LANE_H) */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,674 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_DUP_N_H)
#define SIMDE_ARM_NEON_DUP_N_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vdup_n_f16(simde_float16 value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vdup_n_f16(value);
#else
simde_float16x4_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_float16x4_from_private(r_);
#endif
}
#define simde_vmov_n_f16 simde_vdup_n_f16
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_f16
#define vdup_n_f16(value) simde_vdup_n_f16((value))
#undef vmov_n_f16
#define vmov_n_f16(value) simde_vmov_n_f16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vdup_n_f32(float value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_f32(value);
#else
simde_float32x2_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_float32x2_from_private(r_);
#endif
}
#define simde_vmov_n_f32 simde_vdup_n_f32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_f32
#define vdup_n_f32(value) simde_vdup_n_f32((value))
#undef vmov_n_f32
#define vmov_n_f32(value) simde_vmov_n_f32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vdup_n_f64(double value) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vdup_n_f64(value);
#else
simde_float64x1_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_float64x1_from_private(r_);
#endif
}
#define simde_vmov_n_f64 simde_vdup_n_f64
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vdup_n_f64
#define vdup_n_f64(value) simde_vdup_n_f64((value))
#undef vmov_n_f64
#define vmov_n_f64(value) simde_vmov_n_f64((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vdup_n_s8(int8_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_s8(value);
#else
simde_int8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi8(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#define simde_vmov_n_s8 simde_vdup_n_s8
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_s8
#define vdup_n_s8(value) simde_vdup_n_s8((value))
#undef vmov_n_s8
#define vmov_n_s8(value) simde_vmov_n_s8((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vdup_n_s16(int16_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_s16(value);
#else
simde_int16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi16(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#define simde_vmov_n_s16 simde_vdup_n_s16
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_s16
#define vdup_n_s16(value) simde_vdup_n_s16((value))
#undef vmov_n_s16
#define vmov_n_s16(value) simde_vmov_n_s16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vdup_n_s32(int32_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_s32(value);
#else
simde_int32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi32(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#define simde_vmov_n_s32 simde_vdup_n_s32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_s32
#define vdup_n_s32(value) simde_vdup_n_s32((value))
#undef vmov_n_s32
#define vmov_n_s32(value) simde_vmov_n_s32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vdup_n_s64(int64_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_s64(value);
#else
simde_int64x1_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_int64x1_from_private(r_);
#endif
}
#define simde_vmov_n_s64 simde_vdup_n_s64
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_s64
#define vdup_n_s64(value) simde_vdup_n_s64((value))
#undef vmov_n_s64
#define vmov_n_s64(value) simde_vmov_n_s64((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vdup_n_u8(uint8_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_u8(value);
#else
simde_uint8x8_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi8(HEDLEY_STATIC_CAST(int8_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#define simde_vmov_n_u8 simde_vdup_n_u8
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_u8
#define vdup_n_u8(value) simde_vdup_n_u8((value))
#undef vmov_n_u8
#define vmov_n_u8(value) simde_vmov_n_u8((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vdup_n_u16(uint16_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_u16(value);
#else
simde_uint16x4_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#define simde_vmov_n_u16 simde_vdup_n_u16
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_u16
#define vdup_n_u16(value) simde_vdup_n_u16((value))
#undef vmov_n_u16
#define vmov_n_u16(value) simde_vmov_n_u16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vdup_n_u32(uint32_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_u32(value);
#else
simde_uint32x2_private r_;
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#define simde_vmov_n_u32 simde_vdup_n_u32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_u32
#define vdup_n_u32(value) simde_vdup_n_u32((value))
#undef vmov_n_u32
#define vmov_n_u32(value) simde_vmov_n_u32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vdup_n_u64(uint64_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdup_n_u64(value);
#else
simde_uint64x1_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_uint64x1_from_private(r_);
#endif
}
#define simde_vmov_n_u64 simde_vdup_n_u64
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdup_n_u64
#define vdup_n_u64(value) simde_vdup_n_u64((value))
#undef vmov_n_u64
#define vmov_n_u64(value) simde_vmov_n_u64((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vdupq_n_f16(simde_float16 value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vdupq_n_f16(value);
#else
simde_float16x8_private r_;
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
return simde_float16x8_from_private(r_);
#endif
}
#define simde_vmovq_n_f32 simde_vdupq_n_f32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_f16
#define vdupq_n_f16(value) simde_vdupq_n_f16((value))
#undef vmovq_n_f16
#define vmovq_n_f16(value) simde_vmovq_n_f16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vdupq_n_f32(float value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_f32(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
(void) value;
return vec_splats(value);
#else
simde_float32x4_private r_;
#if defined(SIMDE_X86_SSE_NATIVE)
r_.m128 = _mm_set1_ps(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#define simde_vmovq_n_f32 simde_vdupq_n_f32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_f32
#define vdupq_n_f32(value) simde_vdupq_n_f32((value))
#undef vmovq_n_f32
#define vmovq_n_f32(value) simde_vmovq_n_f32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vdupq_n_f64(double value) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vdupq_n_f64(value);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
(void) value;
return vec_splats(value);
#else
simde_float64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128d = _mm_set1_pd(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#define simde_vmovq_n_f64 simde_vdupq_n_f64
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_f64
#define vdupq_n_f64(value) simde_vdupq_n_f64((value))
#undef vmovq_n_f64
#define vmovq_n_f64(value) simde_vmovq_n_f64((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vdupq_n_s8(int8_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_s8(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_int8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi8(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#define simde_vmovq_n_s8 simde_vdupq_n_s8
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_s8
#define vdupq_n_s8(value) simde_vdupq_n_s8((value))
#undef vmovq_n_s8
#define vmovq_n_s8(value) simde_vmovq_n_s8((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vdupq_n_s16(int16_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_s16(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_int16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi16(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#define simde_vmovq_n_s16 simde_vdupq_n_s16
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_s16
#define vdupq_n_s16(value) simde_vdupq_n_s16((value))
#undef vmovq_n_s16
#define vmovq_n_s16(value) simde_vmovq_n_s16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vdupq_n_s32(int32_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_s32(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_int32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi32(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#define simde_vmovq_n_s32 simde_vdupq_n_s32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_s32
#define vdupq_n_s32(value) simde_vdupq_n_s32((value))
#undef vmovq_n_s32
#define vmovq_n_s32(value) simde_vmovq_n_s32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vdupq_n_s64(int64_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_s64(value);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(HEDLEY_STATIC_CAST(signed long long, value));
#else
simde_int64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))
r_.m128i = _mm_set1_epi64x(value);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_splat(value);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#define simde_vmovq_n_s64 simde_vdupq_n_s64
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_s64
#define vdupq_n_s64(value) simde_vdupq_n_s64((value))
#undef vmovq_n_s64
#define vmovq_n_s64(value) simde_vmovq_n_s64((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vdupq_n_u8(uint8_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_u8(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_uint8x16_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value));
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#define simde_vmovq_n_u8 simde_vdupq_n_u8
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_u8
#define vdupq_n_u8(value) simde_vdupq_n_u8((value))
#undef vmovq_n_u8
#define vmovq_n_u8(value) simde_vmovq_n_u8((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vdupq_n_u16(uint16_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_u16(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_uint16x8_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value));
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#define simde_vmovq_n_u16 simde_vdupq_n_u16
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_u16
#define vdupq_n_u16(value) simde_vdupq_n_u16((value))
#undef vmovq_n_u16
#define vmovq_n_u16(value) simde_vmovq_n_u16((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vdupq_n_u32(uint32_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_u32(value);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(value);
#else
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value));
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#define simde_vmovq_n_u32 simde_vdupq_n_u32
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_u32
#define vdupq_n_u32(value) simde_vdupq_n_u32((value))
#undef vmovq_n_u32
#define vmovq_n_u32(value) simde_vmovq_n_u32((value))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vdupq_n_u64(uint64_t value) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vdupq_n_u64(value);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value));
#else
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))
r_.m128i = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value));
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, value));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = value;
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#define simde_vmovq_n_u64 simde_vdupq_n_u64
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vdupq_n_u64
#define vdupq_n_u64(value) simde_vdupq_n_u64((value))
#undef vmovq_n_u64
#define vmovq_n_u64(value) simde_vmovq_n_u64((value))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_DUP_N_H) */

View File

@@ -0,0 +1,552 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_EOR_H)
#define SIMDE_ARM_NEON_EOR_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_veor_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_s8(a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_s8
#define veor_s8(a, b) simde_veor_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_veor_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_s16(a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_s16
#define veor_s16(a, b) simde_veor_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_veor_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_s32(a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_s32
#define veor_s32(a, b) simde_veor_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_veor_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_s64(a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_s64
#define veor_s64(a, b) simde_veor_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_veor_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_u8
#define veor_u8(a, b) simde_veor_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_veor_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_u16
#define veor_u16(a, b) simde_veor_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_veor_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_u32
#define veor_u32(a, b) simde_veor_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_veor_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veor_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veor_u64
#define veor_u64(a, b) simde_veor_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_veorq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_s8
#define veorq_s8(a, b) simde_veorq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_veorq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_s16
#define veorq_s16(a, b) simde_veorq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_veorq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_s32
#define veorq_s32(a, b) simde_veorq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_veorq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_xor(a, b);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_s64
#define veorq_s64(a, b) simde_veorq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_veorq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_u8
#define veorq_u8(a, b) simde_veorq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_veorq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_u16
#define veorq_u16(a, b) simde_veorq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_veorq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_xor(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_u32
#define veorq_u32(a, b) simde_veorq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_veorq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return veorq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_xor(a, b);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values ^ b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] ^ b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef veorq_u64
#define veorq_u64(a, b) simde_veorq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_EOR_H) */

View File

@@ -0,0 +1,796 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_EXT_H)
#define SIMDE_ARM_NEON_EXT_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_float32x2_t r;
SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_float32x2_private
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_f32(a, b, n) simde_float32x2_from_m64(_mm_alignr_pi8(simde_float32x2_to_m64(b), simde_float32x2_to_m64(a), n * sizeof(simde_float32)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_f32(a, b, n) (__extension__ ({ \
simde_float32x2_private simde_vext_f32_r_; \
simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_float32x2_from_private(simde_vext_f32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_f32
#define vext_f32(a, b, n) simde_vext_f32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
(void) n;
return vext_f64(a, b, 0);
#else
simde_float64x1_private
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_f64(a, b, n) simde_float64x1_from_m64(_mm_alignr_pi8(simde_float64x1_to_m64(b), simde_float64x1_to_m64(a), n * sizeof(simde_float64)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vext_f64(a, b, n) (__extension__ ({ \
simde_float64x1_private simde_vext_f64_r_; \
simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, (n))); \
simde_float64x1_from_private(simde_vext_f64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vext_f64
#define vext_f64(a, b, n) simde_vext_f64((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int8x8_t r;
SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
}
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_s8(a, b, n) simde_int8x8_from_m64(_mm_alignr_pi8(simde_int8x8_to_m64(b), simde_int8x8_to_m64(a), n * sizeof(int8_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_s8(a, b, n) (__extension__ ({ \
simde_int8x8_private simde_vext_s8_r_; \
simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
simde_int8x8_from_private(simde_vext_s8_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_s8
#define vext_s8(a, b, n) simde_vext_s8((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int16x4_t r;
SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
}
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_s16(a, b, n) simde_int16x4_from_m64(_mm_alignr_pi8(simde_int16x4_to_m64(b), simde_int16x4_to_m64(a), n * sizeof(int16_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_s16(a, b, n) (__extension__ ({ \
simde_int16x4_private simde_vext_s16_r_; \
simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
simde_int16x4_from_private(simde_vext_s16_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_s16
#define vext_s16(a, b, n) simde_vext_s16((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x2_t r;
SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_s32(a, b, n) simde_int32x2_from_m64(_mm_alignr_pi8(simde_int32x2_to_m64(b), simde_int32x2_to_m64(a), n * sizeof(int32_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_s32(a, b, n) (__extension__ ({ \
simde_int32x2_private simde_vext_s32_r_; \
simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_int32x2_from_private(simde_vext_s32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_s32
#define vext_s32(a, b, n) simde_vext_s32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
(void) n;
return vext_s64(a, b, 0);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
}
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_s64(a, b, n) simde_int64x1_from_m64(_mm_alignr_pi8(simde_int64x1_to_m64(b), simde_int64x1_to_m64(a), n * sizeof(int64_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vext_s64(a, b, n) (__extension__ ({ \
simde_int64x1_private simde_vext_s64_r_; \
simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \
simde_int64x1_from_private(simde_vext_s64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_s64
#define vext_s64(a, b, n) simde_vext_s64((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint8x8_t r;
SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
}
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_u8(a, b, n) simde_uint8x8_from_m64(_mm_alignr_pi8(simde_uint8x8_to_m64(b), simde_uint8x8_to_m64(a), n * sizeof(uint8_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_u8(a, b, n) (__extension__ ({ \
simde_uint8x8_private simde_vext_u8_r_; \
simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
simde_uint8x8_from_private(simde_vext_u8_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_u8
#define vext_u8(a, b, n) simde_vext_u8((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint16x4_t r;
SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
}
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_u16(a, b, n) simde_uint16x4_from_m64(_mm_alignr_pi8(simde_uint16x4_to_m64(b), simde_uint16x4_to_m64(a), n * sizeof(uint16_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_u16(a, b, n) (__extension__ ({ \
simde_uint16x4_private simde_vext_u16_r_; \
simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
simde_uint16x4_from_private(simde_vext_u16_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_u16
#define vext_u16(a, b, n) simde_vext_u16((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x2_t r;
SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_u32(a, b, n) simde_uint32x2_from_m64(_mm_alignr_pi8(simde_uint32x2_to_m64(b), simde_uint32x2_to_m64(a), n * sizeof(uint32_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
#define simde_vext_u32(a, b, n) (__extension__ ({ \
simde_uint32x2_private simde_vext_u32_r_; \
simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_uint32x2_from_private(simde_vext_u32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_u32
#define vext_u32(a, b, n) simde_vext_u32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
(void) n;
return vext_u64(a, b, 0);
#else
simde_uint64x1_private
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
}
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vext_u64(a, b, n) simde_uint64x1_from_m64(_mm_alignr_pi8(simde_uint64x1_to_m64(b), simde_uint64x1_to_m64(a), n * sizeof(uint64_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vext_u64(a, b, n) (__extension__ ({ \
simde_uint64x1_private simde_vext_u64_r_; \
simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \
simde_uint64x1_from_private(simde_vext_u64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vext_u64
#define vext_u64(a, b, n) simde_vext_u64((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_float32x4_t r;
SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_float32x4_private
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
}
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_f32(a, b, n) simde_float32x4_from_m128(_mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(simde_float32x4_to_m128(b)), _mm_castps_si128(simde_float32x4_to_m128(a)), n * sizeof(simde_float32))))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_f32(a, b, n) (__extension__ ({ \
simde_float32x4_private simde_vextq_f32_r_; \
simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
simde_float32x4_from_private(simde_vextq_f32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_f32
#define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
simde_float64x2_t r;
SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_float64x2_private
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_f64(a, b, n) simde_float64x2_from_m128d(_mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(simde_float64x2_to_m128d(b)), _mm_castpd_si128(simde_float64x2_to_m128d(a)), n * sizeof(simde_float64))))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_f64(a, b, n) (__extension__ ({ \
simde_float64x2_private simde_vextq_f64_r_; \
simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_float64x2_from_private(simde_vextq_f64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vextq_f64
#define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int8x16_t r;
SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
}
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_s8(a, b, n) simde_int8x16_from_m128i(_mm_alignr_epi8(simde_int8x16_to_m128i(b), simde_int8x16_to_m128i(a), n * sizeof(int8_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_s8(a, b, n) (__extension__ ({ \
simde_int8x16_private simde_vextq_s8_r_; \
simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \
simde_int8x16_from_private(simde_vextq_s8_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_s8
#define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int16x8_t r;
SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
}
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_s16(a, b, n) simde_int16x8_from_m128i(_mm_alignr_epi8(simde_int16x8_to_m128i(b), simde_int16x8_to_m128i(a), n * sizeof(int16_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_s16(a, b, n) (__extension__ ({ \
simde_int16x8_private simde_vextq_s16_r_; \
simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
simde_int16x8_from_private(simde_vextq_s16_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_s16
#define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int32x4_t r;
SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
}
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_s32(a, b, n) simde_int32x4_from_m128i(_mm_alignr_epi8(simde_int32x4_to_m128i(b), simde_int32x4_to_m128i(a), n * sizeof(int32_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_s32(a, b, n) (__extension__ ({ \
simde_int32x4_private simde_vextq_s32_r_; \
simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
simde_int32x4_from_private(simde_vextq_s32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_s32
#define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_int64x2_t r;
SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_s64(a, b, n) simde_int64x2_from_m128i(_mm_alignr_epi8(simde_int64x2_to_m128i(b), simde_int64x2_to_m128i(a), n * sizeof(int64_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_s64(a, b, n) (__extension__ ({ \
simde_int64x2_private simde_vextq_s64_r_; \
simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_int64x2_from_private(simde_vextq_s64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_s64
#define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint8x16_t r;
SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
}
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_u8(a, b, n) simde_uint8x16_from_m128i(_mm_alignr_epi8(simde_uint8x16_to_m128i(b), simde_uint8x16_to_m128i(a), n * sizeof(uint8_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_u8(a, b, n) (__extension__ ({ \
simde_uint8x16_private simde_vextq_u8_r_; \
simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \
simde_uint8x16_from_private(simde_vextq_u8_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_u8
#define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint16x8_t r;
SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint16x8_private
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
}
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_u16(a, b, n) simde_uint16x8_from_m128i(_mm_alignr_epi8(simde_uint16x8_to_m128i(b), simde_uint16x8_to_m128i(a), n * sizeof(uint16_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_u16(a, b, n) (__extension__ ({ \
simde_uint16x8_private simde_vextq_u16_r_; \
simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
simde_uint16x8_from_private(simde_vextq_u16_r_); \
}))
#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
#define simde_vextq_u16(a, b, n) (__extension__ ({ \
simde_uint16x8_private r_; \
r_.values = __builtin_shufflevector( \
simde_uint16x8_to_private(a).values, \
simde_uint16x8_to_private(b).values, \
n + 0, n + 1, n + 2, n + 3, n + 4, n + 5, n + 6, n + 7); \
simde_uint16x8_from_private(r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_u16
#define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint32x4_t r;
SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint32x4_private
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
}
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_u32(a, b, n) simde_uint32x4_from_m128i(_mm_alignr_epi8(simde_uint32x4_to_m128i(b), simde_uint32x4_to_m128i(a), n * sizeof(uint32_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_u32(a, b, n) (__extension__ ({ \
simde_uint32x4_private simde_vextq_u32_r_; \
simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
simde_uint32x4_from_private(simde_vextq_u32_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_u32
#define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
simde_uint64x2_t r;
SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
return r;
#else
simde_uint64x2_private
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b),
r_ = a_;
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
size_t src = i + n_;
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
}
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
#define simde_vextq_u64(a, b, n) simde_uint64x2_from_m128i(_mm_alignr_epi8(simde_uint64x2_to_m128i(b), simde_uint64x2_to_m128i(a), n * sizeof(uint64_t)))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
#define simde_vextq_u64(a, b, n) (__extension__ ({ \
simde_uint64x2_private simde_vextq_u64_r_; \
simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
simde_uint64x2_from_private(simde_vextq_u64_r_); \
}))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vextq_u64
#define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_EXT_H) */

View File

@@ -0,0 +1,126 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_FMA_H)
#define SIMDE_ARM_NEON_FMA_H
#include "add.h"
#include "mul.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
return vfma_f32(a, b, c);
#else
return simde_vadd_f32(a, simde_vmul_f32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfma_f32
#define vfma_f32(a, b, c) simde_vfma_f32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
return vfma_f64(a, b, c);
#else
return simde_vadd_f64(a, simde_vmul_f64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfma_f64
#define vfma_f64(a, b, c) simde_vfma_f64(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
return vfmaq_f32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_madd(b, c, a);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b),
c_ = simde_float32x4_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128);
#endif
return simde_float32x4_from_private(r_);
#else
return simde_vaddq_f32(a, simde_vmulq_f32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfmaq_f32
#define vfmaq_f32(a, b, c) simde_vfmaq_f32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vfmaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
return vfmaq_f64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_madd(b, c, a);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b),
c_ = simde_float64x2_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d);
#endif
return simde_float64x2_from_private(r_);
#else
return simde_vaddq_f64(a, simde_vmulq_f64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfmaq_f64
#define vfmaq_f64(a, b, c) simde_vfmaq_f64(a, b, c)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */

View File

@@ -0,0 +1,225 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_FMA_LANE_H)
#define SIMDE_ARM_NEON_FMA_LANE_H
#include "add.h"
#include "dup_n.h"
#include "get_lane.h"
#include "mul.h"
#include "mul_lane.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
/* simde_vfmad_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmad_lane_f64(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_lane_f64(a, b, v, lane))
#else
#define simde_vfmad_lane_f64(a, b, v, lane) vfmad_lane_f64((a), (b), (v), (lane))
#endif
#else
#define simde_vfmad_lane_f64(a, b, v, lane) \
simde_vget_lane_f64( \
simde_vadd_f64( \
simde_vdup_n_f64(a), \
simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \
), \
0 \
)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmad_lane_f64
#define vfmad_lane_f64(a, b, v, lane) simde_vfmad_lane_f64(a, b, v, lane)
#endif
/* simde_vfmad_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmad_laneq_f64(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_laneq_f64(a, b, v, lane))
#else
#define simde_vfmad_laneq_f64(a, b, v, lane) vfmad_laneq_f64((a), (b), (v), (lane))
#endif
#else
#define simde_vfmad_laneq_f64(a, b, v, lane) \
simde_vget_lane_f64( \
simde_vadd_f64( \
simde_vdup_n_f64(a), \
simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \
), \
0 \
)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmad_laneq_f64
#define vfmad_laneq_f64(a, b, v, lane) simde_vfmad_laneq_f64(a, b, v, lane)
#endif
/* simde_vfmas_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmas_lane_f32(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_lane_f32(a, b, v, lane))
#else
#define simde_vfmas_lane_f32(a, b, v, lane) vfmas_lane_f32((a), (b), (v), (lane))
#endif
#else
#define simde_vfmas_lane_f32(a, b, v, lane) \
simde_vget_lane_f32( \
simde_vadd_f32( \
simde_vdup_n_f32(a), \
simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \
), \
0 \
)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmas_lane_f32
#define vfmas_lane_f32(a, b, v, lane) simde_vfmas_lane_f32(a, b, v, lane)
#endif
/* simde_vfmas_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmas_laneq_f32(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_laneq_f32(a, b, v, lane))
#else
#define simde_vfmas_laneq_f32(a, b, v, lane) vfmas_laneq_f32((a), (b), (v), (lane))
#endif
#else
#define simde_vfmas_laneq_f32(a, b, v, lane) \
simde_vget_lane_f32( \
simde_vadd_f32( \
simde_vdup_n_f32(a), \
simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \
), \
0 \
)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmas_laneq_f32
#define vfmas_laneq_f32(a, b, v, lane) simde_vfmas_laneq_f32(a, b, v, lane)
#endif
/* simde_vfma_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane)
#else
#define simde_vfma_lane_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_lane_f32(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfma_lane_f32
#define vfma_lane_f32(a, b, v, lane) simde_vfma_lane_f32(a, b, v, lane)
#endif
/* simde_vfma_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfma_lane_f64(a, b, v, lane) vfma_lane_f64((a), (b), (v), (lane))
#else
#define simde_vfma_lane_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_lane_f64(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfma_lane_f64
#define vfma_lane_f64(a, b, v, lane) simde_vfma_lane_f64(a, b, v, lane)
#endif
/* simde_vfma_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane))
#else
#define simde_vfma_laneq_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_laneq_f32(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfma_laneq_f32
#define vfma_laneq_f32(a, b, v, lane) simde_vfma_laneq_f32(a, b, v, lane)
#endif
/* simde_vfma_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfma_laneq_f64(a, b, v, lane) vfma_laneq_f64((a), (b), (v), (lane))
#else
#define simde_vfma_laneq_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_laneq_f64(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfma_laneq_f64
#define vfma_laneq_f64(a, b, v, lane) simde_vfma_laneq_f64(a, b, v, lane)
#endif
/* simde_vfmaq_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfmaq_lane_f64(a, b, v, lane) vfmaq_lane_f64((a), (b), (v), (lane))
#else
#define simde_vfmaq_lane_f64(a, b, v, lane) simde_vaddq_f64(a, simde_vmulq_lane_f64(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmaq_lane_f64
#define vfmaq_lane_f64(a, b, v, lane) simde_vfmaq_lane_f64(a, b, v, lane)
#endif
/* simde_vfmaq_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane))
#else
#define simde_vfmaq_lane_f32(a, b, v, lane) simde_vaddq_f32(a, simde_vmulq_lane_f32(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmaq_lane_f32
#define vfmaq_lane_f32(a, b, v, lane) simde_vfmaq_lane_f32(a, b, v, lane)
#endif
/* simde_vfmaq_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane))
#else
#define simde_vfmaq_laneq_f32(a, b, v, lane) \
simde_vaddq_f32(a, simde_vmulq_laneq_f32(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmaq_laneq_f32
#define vfmaq_laneq_f32(a, b, v, lane) simde_vfmaq_laneq_f32(a, b, v, lane)
#endif
/* simde_vfmaq_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#define simde_vfmaq_laneq_f64(a, b, v, lane) vfmaq_laneq_f64((a), (b), (v), (lane))
#else
#define simde_vfmaq_laneq_f64(a, b, v, lane) \
simde_vaddq_f64(a, simde_vmulq_laneq_f64(b, v, lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vfmaq_laneq_f64
#define vfmaq_laneq_f64(a, b, v, lane) simde_vfmaq_laneq_f64(a, b, v, lane)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_FMA_LANE_H) */

View File

@@ -0,0 +1,97 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_FMA_N_H)
#define SIMDE_ARM_NEON_FMA_N_H
#include "types.h"
#include "dup_n.h"
#include "fma.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
return vfma_n_f32(a, b, c);
#else
return simde_vfma_f32(a, b, simde_vdup_n_f32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfma_n_f32
#define vfma_n_f32(a, b, c) simde_vfma_n_f32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vfma_n_f64(a, b, c);
#else
return simde_vfma_f64(a, b, simde_vdup_n_f64(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfma_n_f64
#define vfma_n_f64(a, b, c) simde_vfma_n_f64(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
return vfmaq_n_f32(a, b, c);
#else
return simde_vfmaq_f32(a, b, simde_vdupq_n_f32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfmaq_n_f32
#define vfmaq_n_f32(a, b, c) simde_vfmaq_n_f32(a, b, c)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vfmaq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vfmaq_n_f64(a, b, c);
#else
return simde_vfmaq_f64(a, b, simde_vdupq_n_f64(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vfmaq_n_f64
#define vfmaq_n_f64(a, b, c) simde_vfmaq_n_f64(a, b, c)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */

View File

@@ -0,0 +1,300 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_GET_HIGH_H)
#define SIMDE_ARM_NEON_GET_HIGH_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vget_high_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_f32(a);
#else
simde_float32x2_private r_;
simde_float32x4_private a_ = simde_float32x4_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_f32
#define vget_high_f32(a) simde_vget_high_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vget_high_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vget_high_f64(a);
#else
simde_float64x1_private r_;
simde_float64x2_private a_ = simde_float64x2_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vget_high_f64
#define vget_high_f64(a) simde_vget_high_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vget_high_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_s8(a);
#else
simde_int8x8_private r_;
simde_int8x16_private a_ = simde_int8x16_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14, 15);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_s8
#define vget_high_s8(a) simde_vget_high_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vget_high_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_s16(a);
#else
simde_int16x4_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_s16
#define vget_high_s16(a) simde_vget_high_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vget_high_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_s32(a);
#else
simde_int32x2_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_s32
#define vget_high_s32(a) simde_vget_high_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vget_high_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_s64(a);
#else
simde_int64x1_private r_;
simde_int64x2_private a_ = simde_int64x2_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_s64
#define vget_high_s64(a) simde_vget_high_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vget_high_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_u8(a);
#else
simde_uint8x8_private r_;
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14,15);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_u8
#define vget_high_u8(a) simde_vget_high_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vget_high_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_u16(a);
#else
simde_uint16x4_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_u16
#define vget_high_u16(a) simde_vget_high_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vget_high_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_u32(a);
#else
simde_uint32x2_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_u32
#define vget_high_u32(a) simde_vget_high_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vget_high_u64(simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_high_u64(a);
#else
simde_uint64x1_private r_;
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_high_u64
#define vget_high_u64(a) simde_vget_high_u64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_GET_HIGH_H) */

View File

@@ -0,0 +1,519 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_GET_LANE_H)
#define SIMDE_ARM_NEON_GET_LANE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vget_lane_f32(simde_float32x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_2_(vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v);
#else
simde_float32x2_private v_ = simde_float32x2_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_f32
#define vget_lane_f32(v, lane) simde_vget_lane_f32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vget_lane_f64(simde_float64x1_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_float64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
(void) lane;
return vget_lane_f64(v, 0);
#else
simde_float64x1_private v_ = simde_float64x1_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vget_lane_f64
#define vget_lane_f64(v, lane) simde_vget_lane_f64((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vget_lane_s8(simde_int8x8_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_8_(vget_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v);
#else
simde_int8x8_private v_ = simde_int8x8_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_s8
#define vget_lane_s8(v, lane) simde_vget_lane_s8((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vget_lane_s16(simde_int16x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_4_(vget_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v);
#else
simde_int16x4_private v_ = simde_int16x4_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_s16
#define vget_lane_s16(v, lane) simde_vget_lane_s16((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vget_lane_s32(simde_int32x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_2_(vget_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v);
#else
simde_int32x2_private v_ = simde_int32x2_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_s32
#define vget_lane_s32(v, lane) simde_vget_lane_s32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vget_lane_s64(simde_int64x1_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
int64_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
(void) lane;
return vget_lane_s64(v, 0);
#else
simde_int64x1_private v_ = simde_int64x1_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_s64
#define vget_lane_s64(v, lane) simde_vget_lane_s64((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vget_lane_u8(simde_uint8x8_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_8_(vget_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v);
#else
simde_uint8x8_private v_ = simde_uint8x8_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_u8
#define vget_lane_u8(v, lane) simde_vget_lane_u8((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vget_lane_u16(simde_uint16x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_4_(vget_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v);
#else
simde_uint16x4_private v_ = simde_uint16x4_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_u16
#define vget_lane_u16(v, lane) simde_vget_lane_u16((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vget_lane_u32(simde_uint32x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_2_(vget_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v);
#else
simde_uint32x2_private v_ = simde_uint32x2_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_u32
#define vget_lane_u32(v, lane) simde_vget_lane_u32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vget_lane_u64(simde_uint64x1_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
uint64_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
(void) lane;
return vget_lane_u64(v, 0);
#else
simde_uint64x1_private v_ = simde_uint64x1_to_private(v);
r = v_.values[lane];
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_lane_u64
#define vget_lane_u64(v, lane) simde_vget_lane_u64((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vgetq_lane_f32(simde_float32x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_4_(vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v);
#else
simde_float32x4_private v_ = simde_float32x4_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
SIMDE_CONSTIFY_4_(wasm_f32x4_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v_.v128);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_f32
#define vgetq_lane_f32(v, lane) simde_vgetq_lane_f32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vgetq_lane_f64(simde_float64x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
SIMDE_CONSTIFY_2_(vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v);
#else
simde_float64x2_private v_ = simde_float64x2_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
SIMDE_CONSTIFY_2_(wasm_f64x2_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v_.v128);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_f64
#define vgetq_lane_f64(v, lane) simde_vgetq_lane_f64((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vgetq_lane_s8(simde_int8x16_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_16_(vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v);
#else
simde_int8x16_private v_ = simde_int8x16_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int r_;
SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(int8_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_s8
#define vgetq_lane_s8(v, lane) simde_vgetq_lane_s8((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vgetq_lane_s16(simde_int16x8_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_8_(vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v);
#else
simde_int16x8_private v_ = simde_int16x8_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int r_;
SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(int16_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_s16
#define vgetq_lane_s16(v, lane) simde_vgetq_lane_s16((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vgetq_lane_s32(simde_int32x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_4_(vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v);
#else
simde_int32x4_private v_ = simde_int32x4_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int r_;
SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(int32_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_s32
#define vgetq_lane_s32(v, lane) simde_vgetq_lane_s32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vgetq_lane_s64(simde_int64x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
int64_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_2_(vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v);
#else
simde_int64x2_private v_ = simde_int64x2_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int64_t r_;
SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(int64_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_s64
#define vgetq_lane_s64(v, lane) simde_vgetq_lane_s64((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vgetq_lane_u8(simde_uint8x16_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_16_(vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v);
#else
simde_uint8x16_private v_ = simde_uint8x16_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int r_;
SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(uint8_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_u8
#define vgetq_lane_u8(v, lane) simde_vgetq_lane_u8((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vgetq_lane_u16(simde_uint16x8_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_8_(vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v);
#else
simde_uint16x8_private v_ = simde_uint16x8_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int r_;
SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(uint16_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_u16
#define vgetq_lane_u16(v, lane) simde_vgetq_lane_u16((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vgetq_lane_u32(simde_uint32x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_4_(vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v);
#else
simde_uint32x4_private v_ = simde_uint32x4_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int32_t r_;
SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(uint32_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_u32
#define vgetq_lane_u32(v, lane) simde_vgetq_lane_u32((v), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vgetq_lane_u64(simde_uint64x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
uint64_t r;
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
SIMDE_CONSTIFY_2_(vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v);
#else
simde_uint64x2_private v_ = simde_uint64x2_to_private(v);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
int64_t r_;
SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v_.v128);
r = HEDLEY_STATIC_CAST(uint64_t, r_);
#else
r = v_.values[lane];
#endif
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vgetq_lane_u64
#define vgetq_lane_u64(v, lane) simde_vgetq_lane_u64((v), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_GET_LANE_H) */

View File

@@ -0,0 +1,332 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_GET_LOW_H)
#define SIMDE_ARM_NEON_GET_LOW_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vget_low_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_f32(a);
#else
simde_float32x2_private r_;
simde_float32x4_private a_ = simde_float32x4_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_f32
#define vget_low_f32(a) simde_vget_low_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vget_low_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vget_low_f64(a);
#else
simde_float64x1_private r_;
simde_float64x2_private a_ = simde_float64x2_to_private(a);
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vget_low_f64
#define vget_low_f64(a) simde_vget_low_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vget_low_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_s8(a);
#else
simde_int8x8_private r_;
simde_int8x16_private a_ = simde_int8x16_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_s8
#define vget_low_s8(a) simde_vget_low_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vget_low_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_s16(a);
#else
simde_int16x4_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_s16
#define vget_low_s16(a) simde_vget_low_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vget_low_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_s32(a);
#else
simde_int32x2_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_s32
#define vget_low_s32(a) simde_vget_low_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vget_low_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_s64(a);
#else
simde_int64x1_private r_;
simde_int64x2_private a_ = simde_int64x2_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_s64
#define vget_low_s64(a) simde_vget_low_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vget_low_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_u8(a);
#else
simde_uint8x8_private r_;
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_u8
#define vget_low_u8(a) simde_vget_low_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vget_low_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_u16(a);
#else
simde_uint16x4_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_u16
#define vget_low_u16(a) simde_vget_low_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vget_low_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_u32(a);
#else
simde_uint32x2_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_u32
#define vget_low_u32(a) simde_vget_low_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vget_low_u64(simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vget_low_u64(a);
#else
simde_uint64x1_private r_;
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_movepi64_pi64(a_.m128i);
#else
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i];
}
#endif
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vget_low_u64
#define vget_low_u64(a) simde_vget_low_u64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_GET_LOW_H) */

View File

@@ -0,0 +1,310 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
/* TODO: the 128-bit versions only require AVX-512 because of the final
* conversions from larger types down to smaller ones. We could get
* the same results from AVX/AVX2 instructions with some shuffling
* to extract the low half of each input element to the low half
* of a 256-bit vector, then cast that to a 128-bit vector. */
#if !defined(SIMDE_ARM_NEON_HADD_H)
#define SIMDE_ARM_NEON_HADD_H
#include "addl.h"
#include "shr_n.h"
#include "movn.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vhadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_s8(a, b);
#else
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddl_s8(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_s8
#define vhadd_s8(a, b) simde_vhadd_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vhadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_s16(a, b);
#else
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddl_s16(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_s16
#define vhadd_s16(a, b) simde_vhadd_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vhadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_s32(a, b);
#else
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddl_s32(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_s32
#define vhadd_s32(a, b) simde_vhadd_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_u8(a, b);
#else
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddl_u8(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_u8
#define vhadd_u8(a, b) simde_vhadd_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_u16(a, b);
#else
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddl_u16(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_u16
#define vhadd_u16(a, b) simde_vhadd_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhadd_u32(a, b);
#else
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddl_u32(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhadd_u32
#define vhadd_u32(a, b) simde_vhadd_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_s8(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_add_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) + HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_s8
#define vhaddq_s8(a, b) simde_vhaddq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_s16(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_add_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) + HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_s16
#define vhaddq_s16(a, b) simde_vhaddq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_s32(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_add_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) + HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_s32
#define vhaddq_s32(a, b) simde_vhaddq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_u8(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_add_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
v128_t lo =
wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_low_u8x16(a_.v128),
wasm_u16x8_extend_low_u8x16(b_.v128)),
1);
v128_t hi =
wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_high_u8x16(a_.v128),
wasm_u16x8_extend_high_u8x16(b_.v128)),
1);
r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
22, 24, 26, 28, 30);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_u8
#define vhaddq_u8(a, b) simde_vhaddq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_u16(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_add_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_u16
#define vhaddq_u16(a, b) simde_vhaddq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhaddq_u32(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_add_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhaddq_u32
#define vhaddq_u32(a, b) simde_vhaddq_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_HADD_H) */

View File

@@ -0,0 +1,310 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
/* TODO: the 128-bit versions only require AVX-512 because of the final
* conversions from larger types down to smaller ones. We could get
* the same results from AVX/AVX2 instructions with some shuffling
* to extract the low half of each input element to the low half
* of a 256-bit vector, then cast that to a 128-bit vector. */
#if !defined(SIMDE_ARM_NEON_HSUB_H)
#define SIMDE_ARM_NEON_HSUB_H
#include "subl.h"
#include "shr_n.h"
#include "movn.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vhsub_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_s8(a, b);
#else
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubl_s8(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_s8
#define vhsub_s8(a, b) simde_vhsub_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vhsub_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_s16(a, b);
#else
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubl_s16(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_s16
#define vhsub_s16(a, b) simde_vhsub_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vhsub_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_s32(a, b);
#else
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubl_s32(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_s32
#define vhsub_s32(a, b) simde_vhsub_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vhsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_u8(a, b);
#else
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubl_u8(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_u8
#define vhsub_u8(a, b) simde_vhsub_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vhsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_u16(a, b);
#else
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubl_u16(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_u16
#define vhsub_u16(a, b) simde_vhsub_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vhsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsub_u32(a, b);
#else
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubl_u32(a, b), 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsub_u32
#define vhsub_u32(a, b) simde_vhsub_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vhsubq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_s8(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_sub_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_s8
#define vhsubq_s8(a, b) simde_vhsubq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vhsubq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_s16(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_sub_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_s16
#define vhsubq_s16(a, b) simde_vhsubq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vhsubq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_s32(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_s32
#define vhsubq_s32(a, b) simde_vhsubq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vhsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_u8(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_sub_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
v128_t lo =
wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_low_u8x16(a_.v128),
wasm_u16x8_extend_low_u8x16(b_.v128)),
1);
v128_t hi =
wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_high_u8x16(a_.v128),
wasm_u16x8_extend_high_u8x16(b_.v128)),
1);
r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
22, 24, 26, 28, 30);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_u8
#define vhsubq_u8(a, b) simde_vhsubq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vhsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_u16(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_sub_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_u16
#define vhsubq_u16(a, b) simde_vhsubq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vhsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vhsubq_u32(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_sub_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vhsubq_u32
#define vhsubq_u32(a, b) simde_vhsubq_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_HSUB_H) */

View File

@@ -0,0 +1,512 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_LD1_H)
#define SIMDE_ARM_NEON_LD1_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vld1_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vld1_f16(ptr);
#else
simde_float16x4_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_float16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_f16
#define vld1_f16(a) simde_vld1_f16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vld1_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_f32(ptr);
#else
simde_float32x2_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_f32
#define vld1_f32(a) simde_vld1_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vld1_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(1)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld1_f64(ptr);
#else
simde_float64x1_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1_f64
#define vld1_f64(a) simde_vld1_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vld1_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_s8(ptr);
#else
simde_int8x8_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_s8
#define vld1_s8(a) simde_vld1_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vld1_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_s16(ptr);
#else
simde_int16x4_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_s16
#define vld1_s16(a) simde_vld1_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vld1_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_s32(ptr);
#else
simde_int32x2_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_s32
#define vld1_s32(a) simde_vld1_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vld1_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_s64(ptr);
#else
simde_int64x1_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_s64
#define vld1_s64(a) simde_vld1_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vld1_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_u8(ptr);
#else
simde_uint8x8_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_u8
#define vld1_u8(a) simde_vld1_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vld1_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_u16(ptr);
#else
simde_uint16x4_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_u16
#define vld1_u16(a) simde_vld1_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vld1_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_u32(ptr);
#else
simde_uint32x2_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_u32
#define vld1_u32(a) simde_vld1_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vld1_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_u64(ptr);
#else
simde_uint64x1_private r_;
simde_memcpy(&r_, ptr, sizeof(r_));
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_u64
#define vld1_u64(a) simde_vld1_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vld1q_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vld1q_f16(ptr);
#else
simde_float16x8_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_float16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_f16
#define vld1q_f16(a) simde_vld1q_f16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_f32(ptr);
#else
simde_float32x4_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_f32
#define vld1q_f32(a) simde_vld1q_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld1q_f64(ptr);
#else
simde_float64x2_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1q_f64
#define vld1q_f64(a) simde_vld1q_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_s8(ptr);
#else
simde_int8x16_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_s8
#define vld1q_s8(a) simde_vld1q_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_s16(ptr);
#else
simde_int16x8_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_s16
#define vld1q_s16(a) simde_vld1q_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_s32(ptr);
#else
simde_int32x4_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_s32
#define vld1q_s32(a) simde_vld1q_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_s64(ptr);
#else
simde_int64x2_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_s64
#define vld1q_s64(a) simde_vld1q_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_u8(ptr);
#else
simde_uint8x16_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u8
#define vld1q_u8(a) simde_vld1q_u8((a))
#endif
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x2_t
simde_vld1q_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if \
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vld1q_u8_x2(ptr);
#else
simde_uint8x16_private a_[2];
for (size_t i = 0; i < 32; i++) {
a_[i / 16].values[i % 16] = ptr[i];
}
simde_uint8x16x2_t s_ = { { simde_uint8x16_from_private(a_[0]),
simde_uint8x16_from_private(a_[1]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u8_x2
#define vld1q_u8_x2(a) simde_vld1q_u8_x2((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x3_t
simde_vld1q_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) {
#if \
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vld1q_u8_x3(ptr);
#else
simde_uint8x16_private a_[3];
for (size_t i = 0; i < 48; i++) {
a_[i / 16].values[i % 16] = ptr[i];
}
simde_uint8x16x3_t s_ = { { simde_uint8x16_from_private(a_[0]),
simde_uint8x16_from_private(a_[1]),
simde_uint8x16_from_private(a_[2]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u8_x3
#define vld1q_u8_x3(a) simde_vld1q_u8_x3((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x4_t
simde_vld1q_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
#if \
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vld1q_u8_x4(ptr);
#else
simde_uint8x16_private a_[4];
for (size_t i = 0; i < 64; i++) {
a_[i / 16].values[i % 16] = ptr[i];
}
simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]),
simde_uint8x16_from_private(a_[1]),
simde_uint8x16_from_private(a_[2]),
simde_uint8x16_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u8_x4
#define vld1q_u8_x4(a) simde_vld1q_u8_x4((a))
#endif
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_u16(ptr);
#else
simde_uint16x8_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u16
#define vld1q_u16(a) simde_vld1q_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_u32(ptr);
#else
simde_uint32x4_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u32
#define vld1q_u32(a) simde_vld1q_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_u64(ptr);
#else
simde_uint64x2_private r_;
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_load(ptr);
#else
simde_memcpy(&r_, ptr, sizeof(r_));
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_u64
#define vld1q_u64(a) simde_vld1q_u64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD1_H) */

View File

@@ -0,0 +1,407 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_LD1_DUP_H)
#define SIMDE_ARM_NEON_LD1_DUP_H
#include "dup_n.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vld1_dup_f32(simde_float32 const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_f32(ptr);
#else
return simde_vdup_n_f32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_f32
#define vld1_dup_f32(a) simde_vld1_dup_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vld1_dup_f64(simde_float64 const * ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld1_dup_f64(ptr);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return simde_vreinterpret_f64_s64(vld1_dup_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, ptr)));
#else
return simde_vdup_n_f64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_f64
#define vld1_dup_f64(a) simde_vld1_dup_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vld1_dup_s8(int8_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_s8(ptr);
#else
return simde_vdup_n_s8(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_s8
#define vld1_dup_s8(a) simde_vld1_dup_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vld1_dup_s16(int16_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_s16(ptr);
#else
return simde_vdup_n_s16(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_s16
#define vld1_dup_s16(a) simde_vld1_dup_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vld1_dup_s32(int32_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_s32(ptr);
#else
return simde_vdup_n_s32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_s32
#define vld1_dup_s32(a) simde_vld1_dup_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vld1_dup_s64(int64_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_s64(ptr);
#else
return simde_vdup_n_s64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_s64
#define vld1_dup_s64(a) simde_vld1_dup_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vld1_dup_u8(uint8_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_u8(ptr);
#else
return simde_vdup_n_u8(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_u8
#define vld1_dup_u8(a) simde_vld1_dup_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vld1_dup_u16(uint16_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_u16(ptr);
#else
return simde_vdup_n_u16(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_u16
#define vld1_dup_u16(a) simde_vld1_dup_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vld1_dup_u32(uint32_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_u32(ptr);
#else
return simde_vdup_n_u32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_u32
#define vld1_dup_u32(a) simde_vld1_dup_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vld1_dup_u64(uint64_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1_dup_u64(ptr);
#else
return simde_vdup_n_u64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_dup_u64
#define vld1_dup_u64(a) simde_vld1_dup_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vld1q_dup_f32(simde_float32 const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_f32(ptr);
#elif \
defined(SIMDE_X86_SSE_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_float32x4_private r_;
#if defined(SIMDE_X86_SSE_NATIVE)
r_.m128 = _mm_load_ps1(ptr);
#else
r_.v128 = wasm_v128_load32_splat(ptr);
#endif
return simde_float32x4_from_private(r_);
#else
return simde_vdupq_n_f32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_f32
#define vld1q_dup_f32(a) simde_vld1q_dup_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vld1q_dup_f64(simde_float64 const * ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld1q_dup_f64(ptr);
#else
return simde_vdupq_n_f64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_f64
#define vld1q_dup_f64(a) simde_vld1q_dup_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vld1q_dup_s8(int8_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_s8(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int8x16_private r_;
r_.v128 = wasm_v128_load8_splat(ptr);
return simde_int8x16_from_private(r_);
#else
return simde_vdupq_n_s8(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_s8
#define vld1q_dup_s8(a) simde_vld1q_dup_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vld1q_dup_s16(int16_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_s16(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int16x8_private r_;
r_.v128 = wasm_v128_load16_splat(ptr);
return simde_int16x8_from_private(r_);
#else
return simde_vdupq_n_s16(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_s16
#define vld1q_dup_s16(a) simde_vld1q_dup_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vld1q_dup_s32(int32_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_s32(ptr);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr)));
#else
r_.v128 = wasm_v128_load32_splat(ptr);
#endif
return simde_int32x4_from_private(r_);
#else
return simde_vdupq_n_s32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_s32
#define vld1q_dup_s32(a) simde_vld1q_dup_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vld1q_dup_s64(int64_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_s64(ptr);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi64x(*ptr);
#else
r_.v128 = wasm_v128_load64_splat(ptr);
#endif
return simde_int64x2_from_private(r_);
#else
return simde_vdupq_n_s64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_s64
#define vld1q_dup_s64(a) simde_vld1q_dup_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vld1q_dup_u8(uint8_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_u8(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint8x16_private r_;
r_.v128 = wasm_v128_load8_splat(ptr);
return simde_uint8x16_from_private(r_);
#else
return simde_vdupq_n_u8(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_u8
#define vld1q_dup_u8(a) simde_vld1q_dup_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vld1q_dup_u16(uint16_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_u16(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint16x8_private r_;
r_.v128 = wasm_v128_load16_splat(ptr);
return simde_uint16x8_from_private(r_);
#else
return simde_vdupq_n_u16(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_u16
#define vld1q_dup_u16(a) simde_vld1q_dup_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vld1q_dup_u32(uint32_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_u32(ptr);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint32x4_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr)));
#else
r_.v128 = wasm_v128_load32_splat(ptr);
#endif
return simde_uint32x4_from_private(r_);
#else
return simde_vdupq_n_u32(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_u32
#define vld1q_dup_u32(a) simde_vld1q_dup_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vld1q_dup_u64(uint64_t const * ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld1q_dup_u64(ptr);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint64x2_private r_;
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_set1_epi64x(*HEDLEY_REINTERPRET_CAST(int64_t const *, ptr));
#else
r_.v128 = wasm_v128_load64_splat(ptr);
#endif
return simde_uint64x2_from_private(r_);
#else
return simde_vdupq_n_u64(*ptr);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_dup_u64
#define vld1q_dup_u64(a) simde_vld1q_dup_u64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD1_DUP_H) */

View File

@@ -0,0 +1,359 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_LD1_LANE_H)
#define SIMDE_ARM_NEON_LD1_LANE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t simde_vld1_lane_s8(int8_t const *ptr, simde_int8x8_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int8x8_private r = simde_int8x8_to_private(src);
r.values[lane] = *ptr;
return simde_int8x8_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_s8(ptr, src, lane) vld1_lane_s8(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_s8
#define vld1_lane_s8(ptr, src, lane) simde_vld1_lane_s8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t simde_vld1_lane_s16(int16_t const *ptr, simde_int16x4_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int16x4_private r = simde_int16x4_to_private(src);
r.values[lane] = *ptr;
return simde_int16x4_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_s16(ptr, src, lane) vld1_lane_s16(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_s16
#define vld1_lane_s16(ptr, src, lane) simde_vld1_lane_s16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t simde_vld1_lane_s32(int32_t const *ptr, simde_int32x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x2_private r = simde_int32x2_to_private(src);
r.values[lane] = *ptr;
return simde_int32x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_s32(ptr, src, lane) vld1_lane_s32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_s32
#define vld1_lane_s32(ptr, src, lane) simde_vld1_lane_s32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t simde_vld1_lane_s64(int64_t const *ptr, simde_int64x1_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_int64x1_private r = simde_int64x1_to_private(src);
r.values[lane] = *ptr;
return simde_int64x1_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_s64(ptr, src, lane) vld1_lane_s64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_s64
#define vld1_lane_s64(ptr, src, lane) simde_vld1_lane_s64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t simde_vld1_lane_u8(uint8_t const *ptr, simde_uint8x8_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint8x8_private r = simde_uint8x8_to_private(src);
r.values[lane] = *ptr;
return simde_uint8x8_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_u8(ptr, src, lane) vld1_lane_u8(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_u8
#define vld1_lane_u8(ptr, src, lane) simde_vld1_lane_u8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t simde_vld1_lane_u16(uint16_t const *ptr, simde_uint16x4_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint16x4_private r = simde_uint16x4_to_private(src);
r.values[lane] = *ptr;
return simde_uint16x4_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_u16(ptr, src, lane) vld1_lane_u16(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_u16
#define vld1_lane_u16(ptr, src, lane) simde_vld1_lane_u16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t simde_vld1_lane_u32(uint32_t const *ptr, simde_uint32x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x2_private r = simde_uint32x2_to_private(src);
r.values[lane] = *ptr;
return simde_uint32x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_u32(ptr, src, lane) vld1_lane_u32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_u32
#define vld1_lane_u32(ptr, src, lane) simde_vld1_lane_u32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t simde_vld1_lane_u64(uint64_t const *ptr, simde_uint64x1_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_uint64x1_private r = simde_uint64x1_to_private(src);
r.values[lane] = *ptr;
return simde_uint64x1_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_u64(ptr, src, lane) vld1_lane_u64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_u64
#define vld1_lane_u64(ptr, src, lane) simde_vld1_lane_u64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t simde_vld1_lane_f32(simde_float32_t const *ptr, simde_float32x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float32x2_private r = simde_float32x2_to_private(src);
r.values[lane] = *ptr;
return simde_float32x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1_lane_f32(ptr, src, lane) vld1_lane_f32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_f32
#define vld1_lane_f32(ptr, src, lane) simde_vld1_lane_f32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t simde_vld1_lane_f64(simde_float64_t const *ptr, simde_float64x1_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_float64x1_private r = simde_float64x1_to_private(src);
r.values[lane] = *ptr;
return simde_float64x1_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vld1_lane_f64(ptr, src, lane) vld1_lane_f64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1_lane_f64
#define vld1_lane_f64(ptr, src, lane) simde_vld1_lane_f64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t simde_vld1q_lane_s8(int8_t const *ptr, simde_int8x16_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_int8x16_private r = simde_int8x16_to_private(src);
r.values[lane] = *ptr;
return simde_int8x16_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_s8(ptr, src, lane) vld1q_lane_s8(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_s8
#define vld1q_lane_s8(ptr, src, lane) simde_vld1q_lane_s8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t simde_vld1q_lane_s16(int16_t const *ptr, simde_int16x8_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int16x8_private r = simde_int16x8_to_private(src);
r.values[lane] = *ptr;
return simde_int16x8_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_s16(ptr, src, lane) vld1q_lane_s16(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_s16
#define vld1q_lane_s16(ptr, src, lane) simde_vld1q_lane_s16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t simde_vld1q_lane_s32(int32_t const *ptr, simde_int32x4_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x4_private r = simde_int32x4_to_private(src);
r.values[lane] = *ptr;
return simde_int32x4_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_s32(ptr, src, lane) vld1q_lane_s32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_s32
#define vld1q_lane_s32(ptr, src, lane) simde_vld1q_lane_s32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t simde_vld1q_lane_s64(int64_t const *ptr, simde_int64x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int64x2_private r = simde_int64x2_to_private(src);
r.values[lane] = *ptr;
return simde_int64x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_s64(ptr, src, lane) vld1q_lane_s64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_s64
#define vld1q_lane_s64(ptr, src, lane) simde_vld1q_lane_s64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t simde_vld1q_lane_u8(uint8_t const *ptr, simde_uint8x16_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_uint8x16_private r = simde_uint8x16_to_private(src);
r.values[lane] = *ptr;
return simde_uint8x16_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_u8(ptr, src, lane) vld1q_lane_u8(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_u8
#define vld1q_lane_u8(ptr, src, lane) simde_vld1q_lane_u8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t simde_vld1q_lane_u16(uint16_t const *ptr, simde_uint16x8_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint16x8_private r = simde_uint16x8_to_private(src);
r.values[lane] = *ptr;
return simde_uint16x8_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_u16(ptr, src, lane) vld1q_lane_u16(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_u16
#define vld1q_lane_u16(ptr, src, lane) simde_vld1q_lane_u16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t simde_vld1q_lane_u32(uint32_t const *ptr, simde_uint32x4_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x4_private r = simde_uint32x4_to_private(src);
r.values[lane] = *ptr;
return simde_uint32x4_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_u32(ptr, src, lane) vld1q_lane_u32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_u32
#define vld1q_lane_u32(ptr, src, lane) simde_vld1q_lane_u32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t simde_vld1q_lane_u64(uint64_t const *ptr, simde_uint64x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint64x2_private r = simde_uint64x2_to_private(src);
r.values[lane] = *ptr;
return simde_uint64x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_u64(ptr, src, lane) vld1q_lane_u64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_u64
#define vld1q_lane_u64(ptr, src, lane) simde_vld1q_lane_u64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t simde_vld1q_lane_f32(simde_float32_t const *ptr, simde_float32x4_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_float32x4_private r = simde_float32x4_to_private(src);
r.values[lane] = *ptr;
return simde_float32x4_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vld1q_lane_f32(ptr, src, lane) vld1q_lane_f32(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_f32
#define vld1q_lane_f32(ptr, src, lane) simde_vld1q_lane_f32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t simde_vld1q_lane_f64(simde_float64_t const *ptr, simde_float64x2_t src,
const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float64x2_private r = simde_float64x2_to_private(src);
r.values[lane] = *ptr;
return simde_float64x2_from_private(r);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vld1q_lane_f64(ptr, src, lane) vld1q_lane_f64(ptr, src, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld1q_lane_f64
#define vld1q_lane_f64(ptr, src, lane) simde_vld1q_lane_f64((ptr), (src), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD1_LANE_H) */

View File

@@ -0,0 +1,713 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_LD2_H)
#define SIMDE_ARM_NEON_LD2_H
#include "get_low.h"
#include "get_high.h"
#include "ld1.h"
#include "uzp.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif
SIMDE_BEGIN_DECLS_
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8x2_t
simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s8(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
v128_t a = wasm_v128_load(ptr);
simde_int8x16_private q_;
q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
simde_int8x16_t q = simde_int8x16_from_private(q_);
simde_int8x8x2_t u = {
simde_vget_low_s8(q),
simde_vget_high_s8(q)
};
return u;
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_int8x16_private a_ = simde_int8x16_to_private(simde_vld1q_s8(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
simde_int8x8x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_int8x8_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int8x8x2_t r = { {
simde_int8x8_from_private(r_[0]),
simde_int8x8_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_s8
#define vld2_s8(a) simde_vld2_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4x2_t
simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s16(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_int16x8_private a_ = simde_int16x8_to_private(simde_vld1q_s16(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7);
simde_int16x4x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_int16x4_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int16x4x2_t r = { {
simde_int16x4_from_private(r_[0]),
simde_int16x4_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_s16
#define vld2_s16(a) simde_vld2_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2x2_t
simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_int32x4_private a_ = simde_int32x4_to_private(simde_vld1q_s32(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
simde_int32x2x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_int32x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int32x2x2_t r = { {
simde_int32x2_from_private(r_[0]),
simde_int32x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_s32
#define vld2_s32(a) simde_vld2_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1x2_t
simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_s64(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_int64x2_private a_ = simde_int64x2_to_private(simde_vld1q_s64(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
simde_int64x1x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_int64x1_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int64x1x2_t r = { {
simde_int64x1_from_private(r_[0]),
simde_int64x1_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_s64
#define vld2_s64(a) simde_vld2_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8x2_t
simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u8(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
v128_t a = wasm_v128_load(ptr);
simde_uint8x16_private q_;
q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
simde_uint8x16_t q = simde_uint8x16_from_private(q_);
simde_uint8x8x2_t u = {
simde_vget_low_u8(q),
simde_vget_high_u8(q)
};
return u;
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vld1q_u8(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
simde_uint8x8x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_uint8x8_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint8x8x2_t r = { {
simde_uint8x8_from_private(r_[0]),
simde_uint8x8_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_u8
#define vld2_u8(a) simde_vld2_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4x2_t
simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u16(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vld1q_u16(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7);
simde_uint16x4x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_uint16x4_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint16x4x2_t r = { {
simde_uint16x4_from_private(r_[0]),
simde_uint16x4_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_u16
#define vld2_u16(a) simde_vld2_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2x2_t
simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vld1q_u32(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
simde_uint32x2x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_uint32x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint32x2x2_t r = { {
simde_uint32x2_from_private(r_[0]),
simde_uint32x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_u32
#define vld2_u32(a) simde_vld2_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1x2_t
simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_u64(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_vld1q_u64(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
simde_uint64x1x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_uint64x1_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint64x1x2_t r = { {
simde_uint64x1_from_private(r_[0]),
simde_uint64x1_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_u64
#define vld2_u64(a) simde_vld2_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2x2_t
simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2_f32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_float32x4_private a_ = simde_float32x4_to_private(simde_vld1q_f32(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
simde_float32x2x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_float32x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float32x2x2_t r = { {
simde_float32x2_from_private(r_[0]),
simde_float32x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2_f32
#define vld2_f32(a) simde_vld2_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1x2_t
simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2_f64(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
simde_float64x2_private a_ = simde_float64x2_to_private(simde_vld1q_f64(ptr));
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
simde_float64x1x2_t r;
simde_memcpy(&r, &a_, sizeof(r));
return r;
#else
simde_float64x1_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float64x1x2_t r = { {
simde_float64x1_from_private(r_[0]),
simde_float64x1_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld2_f64
#define vld2_f64(a) simde_vld2_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16x2_t
simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s8(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_s8(
simde_vld1q_s8(&(ptr[0])),
simde_vld1q_s8(&(ptr[16]))
);
#else
simde_int8x16_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int8x16x2_t r = { {
simde_int8x16_from_private(r_[0]),
simde_int8x16_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_s8
#define vld2q_s8(a) simde_vld2q_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4x2_t
simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_s32(
simde_vld1q_s32(&(ptr[0])),
simde_vld1q_s32(&(ptr[4]))
);
#else
simde_int32x4_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int32x4x2_t r = { {
simde_int32x4_from_private(r_[0]),
simde_int32x4_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_s32
#define vld2q_s32(a) simde_vld2q_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8x2_t
simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_s16(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_s16(
simde_vld1q_s16(&(ptr[0])),
simde_vld1q_s16(&(ptr[8]))
);
#else
simde_int16x8_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int16x8x2_t r = { {
simde_int16x8_from_private(r_[0]),
simde_int16x8_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_s16
#define vld2q_s16(a) simde_vld2q_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2x2_t
simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_s64(ptr);
#else
simde_int64x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int64x2x2_t r = { {
simde_int64x2_from_private(r_[0]),
simde_int64x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld2q_s64
#define vld2q_s64(a) simde_vld2q_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x2_t
simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u8(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_u8(
simde_vld1q_u8(&(ptr[ 0])),
simde_vld1q_u8(&(ptr[16]))
);
#else
simde_uint8x16_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint8x16x2_t r = { {
simde_uint8x16_from_private(r_[0]),
simde_uint8x16_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_u8
#define vld2q_u8(a) simde_vld2q_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8x2_t
simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u16(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_u16(
simde_vld1q_u16(&(ptr[0])),
simde_vld1q_u16(&(ptr[8]))
);
#else
simde_uint16x8_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint16x8x2_t r = { {
simde_uint16x8_from_private(r_[0]),
simde_uint16x8_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_u16
#define vld2q_u16(a) simde_vld2q_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4x2_t
simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_u32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_u32(
simde_vld1q_u32(&(ptr[0])),
simde_vld1q_u32(&(ptr[4]))
);
#else
simde_uint32x4_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint32x4x2_t r = { {
simde_uint32x4_from_private(r_[0]),
simde_uint32x4_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_u32
#define vld2q_u32(a) simde_vld2q_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2x2_t
simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_u64(ptr);
#else
simde_uint64x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint64x2x2_t r = { {
simde_uint64x2_from_private(r_[0]),
simde_uint64x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld2q_u64
#define vld2q_u64(a) simde_vld2q_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4x2_t
simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld2q_f32(ptr);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return
simde_vuzpq_f32(
simde_vld1q_f32(&(ptr[0])),
simde_vld1q_f32(&(ptr[4]))
);
#else
simde_float32x4_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float32x4x2_t r = { {
simde_float32x4_from_private(r_[0]),
simde_float32x4_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld2q_f32
#define vld2q_f32(a) simde_vld2q_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2x2_t
simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld2q_f64(ptr);
#else
simde_float64x2_private r_[2];
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float64x2x2_t r = { {
simde_float64x2_from_private(r_[0]),
simde_float64x2_from_private(r_[1]),
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld2q_f64
#define vld2q_f64(a) simde_vld2q_f64((a))
#endif
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD2_H) */

View File

@@ -0,0 +1,609 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_LD3_H)
#define SIMDE_ARM_NEON_LD3_H
#include "types.h"
#include "ld1.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif
SIMDE_BEGIN_DECLS_
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2x3_t
simde_vld3_f32(simde_float32 const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_f32(ptr);
#else
simde_float32x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float32x2x3_t r = { {
simde_float32x2_from_private(r_[0]),
simde_float32x2_from_private(r_[1]),
simde_float32x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_f32
#define vld3_f32(a) simde_vld3_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1x3_t
simde_vld3_f64(simde_float64 const *ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld3_f64(ptr);
#else
simde_float64x1_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float64x1x3_t r = { {
simde_float64x1_from_private(r_[0]),
simde_float64x1_from_private(r_[1]),
simde_float64x1_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3_f64
#define vld3_f64(a) simde_vld3_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8x3_t
simde_vld3_s8(int8_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_s8(ptr);
#else
simde_int8x8_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int8x8x3_t r = { {
simde_int8x8_from_private(r_[0]),
simde_int8x8_from_private(r_[1]),
simde_int8x8_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_s8
#define vld3_s8(a) simde_vld3_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4x3_t
simde_vld3_s16(int16_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_s16(ptr);
#else
simde_int16x4_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int16x4x3_t r = { {
simde_int16x4_from_private(r_[0]),
simde_int16x4_from_private(r_[1]),
simde_int16x4_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_s16
#define vld3_s16(a) simde_vld3_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2x3_t
simde_vld3_s32(int32_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_s32(ptr);
#else
simde_int32x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int32x2x3_t r = { {
simde_int32x2_from_private(r_[0]),
simde_int32x2_from_private(r_[1]),
simde_int32x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_s32
#define vld3_s32(a) simde_vld3_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1x3_t
simde_vld3_s64(int64_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_s64(ptr);
#else
simde_int64x1_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int64x1x3_t r = { {
simde_int64x1_from_private(r_[0]),
simde_int64x1_from_private(r_[1]),
simde_int64x1_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3_s64
#define vld3_s64(a) simde_vld3_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8x3_t
simde_vld3_u8(uint8_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_u8(ptr);
#else
simde_uint8x8_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint8x8x3_t r = { {
simde_uint8x8_from_private(r_[0]),
simde_uint8x8_from_private(r_[1]),
simde_uint8x8_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_u8
#define vld3_u8(a) simde_vld3_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4x3_t
simde_vld3_u16(uint16_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_u16(ptr);
#else
simde_uint16x4_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint16x4x3_t r = { {
simde_uint16x4_from_private(r_[0]),
simde_uint16x4_from_private(r_[1]),
simde_uint16x4_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_u16
#define vld3_u16(a) simde_vld3_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2x3_t
simde_vld3_u32(uint32_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_u32(ptr);
#else
simde_uint32x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint32x2x3_t r = { {
simde_uint32x2_from_private(r_[0]),
simde_uint32x2_from_private(r_[1]),
simde_uint32x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3_u32
#define vld3_u32(a) simde_vld3_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1x3_t
simde_vld3_u64(uint64_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3_u64(ptr);
#else
simde_uint64x1_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint64x1x3_t r = { {
simde_uint64x1_from_private(r_[0]),
simde_uint64x1_from_private(r_[1]),
simde_uint64x1_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3_u64
#define vld3_u64(a) simde_vld3_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4x3_t
simde_vld3q_f32(simde_float32 const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_f32(ptr);
#else
simde_float32x4_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float32x4x3_t r = { {
simde_float32x4_from_private(r_[0]),
simde_float32x4_from_private(r_[1]),
simde_float32x4_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_f32
#define vld3q_f32(a) simde_vld3q_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2x3_t
simde_vld3q_f64(simde_float64 const *ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld3q_f64(ptr);
#else
simde_float64x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_float64x2x3_t r = { {
simde_float64x2_from_private(r_[0]),
simde_float64x2_from_private(r_[1]),
simde_float64x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3q_f64
#define vld3q_f64(a) simde_vld3q_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16x3_t
simde_vld3q_s8(int8_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_s8(ptr);
#else
simde_int8x16_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int8x16x3_t r = { {
simde_int8x16_from_private(r_[0]),
simde_int8x16_from_private(r_[1]),
simde_int8x16_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_s8
#define vld3q_s8(a) simde_vld3q_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8x3_t
simde_vld3q_s16(int16_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_s16(ptr);
#else
simde_int16x8_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int16x8x3_t r = { {
simde_int16x8_from_private(r_[0]),
simde_int16x8_from_private(r_[1]),
simde_int16x8_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_s16
#define vld3q_s16(a) simde_vld3q_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4x3_t
simde_vld3q_s32(int32_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_s32(ptr);
#else
simde_int32x4_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int32x4x3_t r = { {
simde_int32x4_from_private(r_[0]),
simde_int32x4_from_private(r_[1]),
simde_int32x4_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_s32
#define vld3q_s32(a) simde_vld3q_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2x3_t
simde_vld3q_s64(int64_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld3q_s64(ptr);
#else
simde_int64x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_int64x2x3_t r = { {
simde_int64x2_from_private(r_[0]),
simde_int64x2_from_private(r_[1]),
simde_int64x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3q_s64
#define vld3q_s64(a) simde_vld3q_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x3_t
simde_vld3q_u8(uint8_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_u8(ptr);
#else
simde_uint8x16_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint8x16x3_t r = { {
simde_uint8x16_from_private(r_[0]),
simde_uint8x16_from_private(r_[1]),
simde_uint8x16_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_u8
#define vld3q_u8(a) simde_vld3q_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8x3_t
simde_vld3q_u16(uint16_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_u16(ptr);
#else
simde_uint16x8_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint16x8x3_t r = { {
simde_uint16x8_from_private(r_[0]),
simde_uint16x8_from_private(r_[1]),
simde_uint16x8_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_u16
#define vld3q_u16(a) simde_vld3q_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4x3_t
simde_vld3q_u32(uint32_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld3q_u32(ptr);
#else
simde_uint32x4_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint32x4x3_t r = { {
simde_uint32x4_from_private(r_[0]),
simde_uint32x4_from_private(r_[1]),
simde_uint32x4_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld3q_u32
#define vld3q_u32(a) simde_vld3q_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2x3_t
simde_vld3q_u64(uint64_t const *ptr) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld3q_u64(ptr);
#else
simde_uint64x2_private r_[3];
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
}
}
simde_uint64x2x3_t r = { {
simde_uint64x2_from_private(r_[0]),
simde_uint64x2_from_private(r_[1]),
simde_uint64x2_from_private(r_[2])
} };
return r;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld3q_u64
#define vld3q_u64(a) simde_vld3q_u64((a))
#endif
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD3_H) */

View File

@@ -0,0 +1,486 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com>
*/
#if !defined(SIMDE_ARM_NEON_LD4_H)
#define SIMDE_ARM_NEON_LD4_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif
SIMDE_BEGIN_DECLS_
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2x4_t
simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_f32(ptr);
#else
simde_float32x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]),
simde_float32x2_from_private(a_[2]), simde_float32x2_from_private(a_[3]) } };
return (s_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_f32
#define vld4_f32(a) simde_vld4_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1x4_t
simde_vld4_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld4_f64(ptr);
#else
simde_float64x1_private a_[4];
for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]),
simde_float64x1_from_private(a_[2]), simde_float64x1_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_f64
#define vld4_f64(a) simde_vld4_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8x4_t
simde_vld4_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_s8(ptr);
#else
simde_int8x8_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]),
simde_int8x8_from_private(a_[2]), simde_int8x8_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_s8
#define vld4_s8(a) simde_vld4_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4x4_t
simde_vld4_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_s16(ptr);
#else
simde_int16x4_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]),
simde_int16x4_from_private(a_[2]), simde_int16x4_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_s16
#define vld4_s16(a) simde_vld4_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2x4_t
simde_vld4_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_s32(ptr);
#else
simde_int32x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]),
simde_int32x2_from_private(a_[2]), simde_int32x2_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_s32
#define vld4_s32(a) simde_vld4_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1x4_t
simde_vld4_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_s64(ptr);
#else
simde_int64x1_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]),
simde_int64x1_from_private(a_[2]), simde_int64x1_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_s64
#define vld4_s64(a) simde_vld4_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8x4_t
simde_vld4_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_u8(ptr);
#else
simde_uint8x8_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]),
simde_uint8x8_from_private(a_[2]), simde_uint8x8_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_u8
#define vld4_u8(a) simde_vld4_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4x4_t
simde_vld4_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_u16(ptr);
#else
simde_uint16x4_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]),
simde_uint16x4_from_private(a_[2]), simde_uint16x4_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_u16
#define vld4_u16(a) simde_vld4_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2x4_t
simde_vld4_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_u32(ptr);
#else
simde_uint32x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]),
simde_uint32x2_from_private(a_[2]), simde_uint32x2_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_u32
#define vld4_u32(a) simde_vld4_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1x4_t
simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4_u64(ptr);
#else
simde_uint64x1_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]),
simde_uint64x1_from_private(a_[2]), simde_uint64x1_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_u64
#define vld4_u64(a) simde_vld4_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4x4_t
simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_f32(ptr);
#else
simde_float32x4_private a_[4];
for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]),
simde_float32x4_from_private(a_[2]), simde_float32x4_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_f32
#define vld4q_f32(a) simde_vld4q_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2x4_t
simde_vld4q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld4q_f64(ptr);
#else
simde_float64x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]),
simde_float64x2_from_private(a_[2]), simde_float64x2_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_f64
#define vld4q_f64(a) simde_vld4q_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16x4_t
simde_vld4q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_s8(ptr);
#else
simde_int8x16_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]),
simde_int8x16_from_private(a_[2]), simde_int8x16_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_s8
#define vld4q_s8(a) simde_vld4q_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8x4_t
simde_vld4q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_s16(ptr);
#else
simde_int16x8_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]),
simde_int16x8_from_private(a_[2]), simde_int16x8_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_s16
#define vld4q_s16(a) simde_vld4q_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4x4_t
simde_vld4q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_s32(ptr);
#else
simde_int32x4_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]),
simde_int32x4_from_private(a_[2]), simde_int32x4_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_s32
#define vld4q_s32(a) simde_vld4q_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2x4_t
simde_vld4q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld4q_s64(ptr);
#else
simde_int64x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]),
simde_int64x2_from_private(a_[2]), simde_int64x2_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_s64
#define vld4q_s64(a) simde_vld4q_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x4_t
simde_vld4q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_u8(ptr);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
// Let a, b, c, d be the 4 uint8x16 to return, they are laid out in memory:
// [a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3,
// a4, b4, c4, d4, a5, b5, c5, d5, a6, b6, c6, d6, a7, b7, c7, d7,
// a8, b8, c8, d8, a9, b9, c9, d9, a10, b10, c10, d10, a11, b11, c11, d11,
// a12, b12, c12, d12, a13, b13, c13, d13, a14, b14, c14, d14, a15, b15, c15, d15]
v128_t a_ = wasm_v128_load(&ptr[0]);
v128_t b_ = wasm_v128_load(&ptr[16]);
v128_t c_ = wasm_v128_load(&ptr[32]);
v128_t d_ = wasm_v128_load(&ptr[48]);
v128_t a_low_b_low = wasm_i8x16_shuffle(a_, b_, 0, 4, 8, 12, 16, 20, 24, 28,
1, 5, 9, 13, 17, 21, 25, 29);
v128_t a_high_b_high = wasm_i8x16_shuffle(c_, d_, 0, 4, 8, 12, 16, 20, 24,
28, 1, 5, 9, 13, 17, 21, 25, 29);
v128_t a = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 0, 1, 2, 3, 4, 5,
6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
v128_t b = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 8, 9, 10, 11, 12,
13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31);
v128_t c_low_d_low = wasm_i8x16_shuffle(a_, b_, 2, 6, 10, 14, 18, 22, 26,
30, 3, 7, 11, 15, 19, 23, 27, 31);
v128_t c_high_d_high = wasm_i8x16_shuffle(c_, d_, 2, 6, 10, 14, 18, 22, 26,
30, 3, 7, 11, 15, 19, 23, 27, 31);
v128_t c = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 0, 1, 2, 3, 4, 5,
6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
v128_t d = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 8, 9, 10, 11, 12,
13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31);
simde_uint8x16_private r_[4];
r_[0].v128 = a;
r_[1].v128 = b;
r_[2].v128 = c;
r_[3].v128 = d;
simde_uint8x16x4_t s_ = {{simde_uint8x16_from_private(r_[0]),
simde_uint8x16_from_private(r_[1]),
simde_uint8x16_from_private(r_[2]),
simde_uint8x16_from_private(r_[3])}};
return s_;
#else
simde_uint8x16_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]),
simde_uint8x16_from_private(a_[2]), simde_uint8x16_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_u8
#define vld4q_u8(a) simde_vld4q_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8x4_t
simde_vld4q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_u16(ptr);
#else
simde_uint16x8_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]),
simde_uint16x8_from_private(a_[2]), simde_uint16x8_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_u16
#define vld4q_u16(a) simde_vld4q_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4x4_t
simde_vld4q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vld4q_u32(ptr);
#else
simde_uint32x4_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]),
simde_uint32x4_from_private(a_[2]), simde_uint32x4_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_u32
#define vld4q_u32(a) simde_vld4q_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2x4_t
simde_vld4q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vld4q_u64(ptr);
#else
simde_uint64x2_private a_[4];
for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 4 ; i++) {
a_[i % 4].values[i / 4] = ptr[i];
}
simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]),
simde_uint64x2_from_private(a_[2]), simde_uint64x2_from_private(a_[3]) } };
return s_;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_u64
#define vld4q_u64(a) simde_vld4q_u64((a))
#endif
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD4_H) */

View File

@@ -0,0 +1,593 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
* 2021 Evan Nemerson <evan@nemerson.com>
*/
/* In older versions of clang, __builtin_neon_vld4_lane_v would
* generate a diagnostic for most variants (those which didn't
* use signed 8-bit integers). I believe this was fixed by
* 78ad22e0cc6390fcd44b2b7b5132f1b960ff975d.
*
* Since we have to use macros (due to the immediate-mode parameter)
* we can't just disable it once in this file; we have to use statement
* exprs and push / pop the stack for each macro. */
#if !defined(SIMDE_ARM_NEON_LD4_LANE_H)
#define SIMDE_ARM_NEON_LD4_LANE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
#if !defined(SIMDE_BUG_INTEL_857088)
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8x4_t
simde_vld4_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int8x8x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int8x8_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_s8(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s8(ptr, src, lane))
#else
#define simde_vld4_lane_s8(ptr, src, lane) vld4_lane_s8(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_s8
#define vld4_lane_s8(ptr, src, lane) simde_vld4_lane_s8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4x4_t
simde_vld4_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int16x4x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int16x4_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_s16(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s16(ptr, src, lane))
#else
#define simde_vld4_lane_s16(ptr, src, lane) vld4_lane_s16(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_s16
#define vld4_lane_s16(ptr, src, lane) simde_vld4_lane_s16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2x4_t
simde_vld4_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int32x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_s32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s32(ptr, src, lane))
#else
#define simde_vld4_lane_s32(ptr, src, lane) vld4_lane_s32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_s32
#define vld4_lane_s32(ptr, src, lane) simde_vld4_lane_s32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1x4_t
simde_vld4_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_int64x1x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int64x1_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_s64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s64(ptr, src, lane))
#else
#define simde_vld4_lane_s64(ptr, src, lane) vld4_lane_s64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_s64
#define vld4_lane_s64(ptr, src, lane) simde_vld4_lane_s64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8x4_t
simde_vld4_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint8x8x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint8x8_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_u8(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u8(ptr, src, lane))
#else
#define simde_vld4_lane_u8(ptr, src, lane) vld4_lane_u8(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_u8
#define vld4_lane_u8(ptr, src, lane) simde_vld4_lane_u8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4x4_t
simde_vld4_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint16x4x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint16x4_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_u16(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u16(ptr, src, lane))
#else
#define simde_vld4_lane_u16(ptr, src, lane) vld4_lane_u16(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_u16
#define vld4_lane_u16(ptr, src, lane) simde_vld4_lane_u16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2x4_t
simde_vld4_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint32x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_u32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u32(ptr, src, lane))
#else
#define simde_vld4_lane_u32(ptr, src, lane) vld4_lane_u32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_u32
#define vld4_lane_u32(ptr, src, lane) simde_vld4_lane_u32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1x4_t
simde_vld4_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_uint64x1x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint64x1_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_u64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u64(ptr, src, lane))
#else
#define simde_vld4_lane_u64(ptr, src, lane) vld4_lane_u64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_u64
#define vld4_lane_u64(ptr, src, lane) simde_vld4_lane_u64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2x4_t
simde_vld4_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float32x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_float32x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_f32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f32(ptr, src, lane))
#else
#define simde_vld4_lane_f32(ptr, src, lane) vld4_lane_f32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_f32
#define vld4_lane_f32(ptr, src, lane) simde_vld4_lane_f32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1x4_t
simde_vld4_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_float64x1x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_float64x1_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4_lane_f64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f64(ptr, src, lane))
#else
#define simde_vld4_lane_f64(ptr, src, lane) vld4_lane_f64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4_lane_f64
#define vld4_lane_f64(ptr, src, lane) simde_vld4_lane_f64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16x4_t
simde_vld4q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_int8x16x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int8x16_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_s8(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s8(ptr, src, lane))
#else
#define simde_vld4q_lane_s8(ptr, src, lane) vld4q_lane_s8(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_s8
#define vld4q_lane_s8(ptr, src, lane) simde_vld4q_lane_s8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8x4_t
simde_vld4q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int16x8x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int16x8_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_s16(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s16(ptr, src, lane))
#else
#define simde_vld4q_lane_s16(ptr, src, lane) vld4q_lane_s16(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_s16
#define vld4q_lane_s16(ptr, src, lane) simde_vld4q_lane_s16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4x4_t
simde_vld4q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x4x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int32x4_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_s32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s32(ptr, src, lane))
#else
#define simde_vld4q_lane_s32(ptr, src, lane) vld4q_lane_s32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_s32
#define vld4q_lane_s32(ptr, src, lane) simde_vld4q_lane_s32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2x4_t
simde_vld4q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int64x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_int64x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_s64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s64(ptr, src, lane))
#else
#define simde_vld4q_lane_s64(ptr, src, lane) vld4q_lane_s64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_s64
#define vld4q_lane_s64(ptr, src, lane) simde_vld4q_lane_s64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16x4_t
simde_vld4q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
simde_uint8x16x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint8x16_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_u8(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u8(ptr, src, lane))
#else
#define simde_vld4q_lane_u8(ptr, src, lane) vld4q_lane_u8(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_u8
#define vld4q_lane_u8(ptr, src, lane) simde_vld4q_lane_u8((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8x4_t
simde_vld4q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint16x8x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint16x8_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_u16(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u16(ptr, src, lane))
#else
#define simde_vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_u16
#define vld4q_lane_u16(ptr, src, lane) simde_vld4q_lane_u16((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4x4_t
simde_vld4q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x4x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint32x4_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_u32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u32(ptr, src, lane))
#else
#define simde_vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_u32
#define vld4q_lane_u32(ptr, src, lane) simde_vld4q_lane_u32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2x4_t
simde_vld4q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint64x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_uint64x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_u64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u64(ptr, src, lane))
#else
#define simde_vld4q_lane_u64(ptr, src, lane) vld4q_lane_u64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_u64
#define vld4q_lane_u64(ptr, src, lane) simde_vld4q_lane_u64((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4x4_t
simde_vld4q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_float32x4x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_float32x4_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_f32(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f32(ptr, src, lane))
#else
#define simde_vld4q_lane_f32(ptr, src, lane) vld4q_lane_f32(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_f32
#define vld4q_lane_f32(ptr, src, lane) simde_vld4q_lane_f32((ptr), (src), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2x4_t
simde_vld4q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t src, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float64x2x4_t r;
for (size_t i = 0 ; i < 4 ; i++) {
simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]);
tmp_.values[lane] = ptr[i];
r.val[i] = simde_float64x2_from_private(tmp_);
}
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
#define simde_vld4q_lane_f64(ptr, src, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f64(ptr, src, lane))
#else
#define simde_vld4q_lane_f64(ptr, src, lane) vld4q_lane_f64(ptr, src, lane)
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vld4q_lane_f64
#define vld4q_lane_f64(ptr, src, lane) simde_vld4q_lane_f64((ptr), (src), (lane))
#endif
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_LD4_LANE_H) */

View File

@@ -0,0 +1,624 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MAX_H)
#define SIMDE_ARM_NEON_MAX_H
#include "types.h"
#include "cgt.h"
#include "bsl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_f32(a, b);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF);
#else
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_f32
#define vmax_f32(a, b) simde_vmax_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmax_f64(a, b);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN);
#else
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmax_f64
#define vmax_f64(a, b) simde_vmax_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_s8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s8(simde_vcgt_s8(a, b), a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_s8
#define vmax_s8(a, b) simde_vmax_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s16(simde_vcgt_s16(a, b), a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_s16
#define vmax_s16(a, b) simde_vmax_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_s32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s32(simde_vcgt_s32(a, b), a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_s32
#define vmax_s32(a, b) simde_vmax_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s64(simde_vcgt_s64(a, b), a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int64x1_from_private(r_);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_u8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u8(simde_vcgt_u8(a, b), a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_u8
#define vmax_u8(a, b) simde_vmax_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_u16(a, b);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE)
return simde_vbsl_u16(simde_vcgt_u16(a, b), a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.m64 = _mm_add_pi16(b_.m64, _mm_subs_pu16(a_.m64, b_.m64));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_u16
#define vmax_u16(a, b) simde_vmax_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmax_u32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u32(simde_vcgt_u32(a, b), a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmax_u32
#define vmax_u32(a, b) simde_vmax_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u64(simde_vcgt_u64(a, b), a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint64x1_from_private(r_);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return
vec_sel(
b,
a,
vec_orc(
vec_cmpgt(a, b),
vec_cmpeq(a, a)
)
);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) cmpres = vec_cmpeq(a, a);
return
vec_sel(
b,
a,
vec_or(
vec_cmpgt(a, b),
vec_nor(cmpres, cmpres)
)
);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS)
r_.m128 = _mm_max_ps(a_.m128, b_.m128);
#elif defined(SIMDE_X86_SSE_NATIVE)
__m128 m = _mm_or_ps(_mm_cmpneq_ps(a_.m128, a_.m128), _mm_cmpgt_ps(a_.m128, b_.m128));
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128 = _mm_blendv_ps(b_.m128, a_.m128, m);
#else
r_.m128 =
_mm_or_ps(
_mm_and_ps(m, a_.m128),
_mm_andnot_ps(m, b_.m128)
);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_max(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF);
#else
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_f32
#define vmaxq_f32(a, b) simde_vmaxq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmaxq_f64(a, b);
#elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
return vec_max(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS)
r_.m128d = _mm_max_pd(a_.m128d, b_.m128d);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128d m = _mm_or_pd(_mm_cmpneq_pd(a_.m128d, a_.m128d), _mm_cmpgt_pd(a_.m128d, b_.m128d));
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128d = _mm_blendv_pd(b_.m128d, a_.m128d, m);
#else
r_.m128d =
_mm_or_pd(
_mm_and_pd(m, a_.m128d),
_mm_andnot_pd(m, b_.m128d)
);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_max(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN);
#else
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxq_f64
#define vmaxq_f64(a, b) simde_vmaxq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_max_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i m = _mm_cmpgt_epi8(a_.m128i, b_.m128i);
r_.m128i = _mm_or_si128(_mm_and_si128(m, a_.m128i), _mm_andnot_si128(m, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_max(a_.v128, b_.v128);
#endif
return simde_int8x16_from_private(r_);
#else
return simde_vbslq_s8(simde_vcgtq_s8(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_s8
#define vmaxq_s8(a, b) simde_vmaxq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_max_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_max(a_.v128, b_.v128);
#endif
return simde_int16x8_from_private(r_);
#else
return simde_vbslq_s16(simde_vcgtq_s16(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_s16
#define vmaxq_s16(a, b) simde_vmaxq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE4_1_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_max_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_max(a_.v128, b_.v128);
#endif
return simde_int32x4_from_private(r_);
#else
return simde_vbslq_s32(simde_vcgtq_s32(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_s32
#define vmaxq_s32(a, b) simde_vmaxq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_x_vmaxq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#else
return simde_vbslq_s64(simde_vcgtq_s64(a, b), a, b);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_max_epu8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_max(a_.v128, b_.v128);
#endif
return simde_uint8x16_from_private(r_);
#else
return simde_vbslq_u8(simde_vcgtq_u8(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_u8
#define vmaxq_u8(a, b) simde_vmaxq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE2_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_max_epu16(a_.m128i, b_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.m128i = _mm_add_epi16(b_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_max(a_.v128, b_.v128);
#endif
return simde_uint16x8_from_private(r_);
#else
return simde_vbslq_u16(simde_vcgtq_u16(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_u16
#define vmaxq_u16(a, b) simde_vmaxq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmaxq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#elif \
defined(SIMDE_X86_SSE4_1_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_max_epu32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_max(a_.v128, b_.v128);
#endif
return simde_uint32x4_from_private(r_);
#else
return simde_vbslq_u32(simde_vcgtq_u32(a, b), a, b);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxq_u32
#define vmaxq_u32(a, b) simde_vmaxq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_x_vmaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_max(a, b);
#else
return simde_vbslq_u64(simde_vcgtq_u64(a, b), a, b);
#endif
}
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MAX_H) */

View File

@@ -0,0 +1,217 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MAXNM_H)
#define SIMDE_ARM_NEON_MAXNM_H
#include "types.h"
#include "cge.h"
#include "bsl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
return vmaxnm_f32(a, b);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmaxf)
r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]);
#else
if (a_.values[i] > b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] < b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxnm_f32
#define vmaxnm_f32(a, b) simde_vmaxnm_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmaxnm_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmaxnm_f64(a, b);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmax)
r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]);
#else
if (a_.values[i] > b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] < b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxnm_f64
#define vmaxnm_f64(a, b) simde_vmaxnm_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
return vmaxnmq_f32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_max(a, b);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE)
#if !defined(SIMDE_FAST_NANS)
__m128 r = _mm_max_ps(a_.m128, b_.m128);
__m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128);
r = _mm_andnot_ps(bnan, r);
r = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan));
r_.m128 = r;
#else
r_.m128 = _mm_max_ps(a_.m128, b_.m128);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
r_.v128 = wasm_f32x4_max(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmaxf)
r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]);
#else
if (a_.values[i] > b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] < b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmaxnmq_f32
#define vmaxnmq_f32(a, b) simde_vmaxnmq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmaxnmq_f64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_max(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
#if !defined(SIMDE_FAST_NANS)
__m128d r = _mm_max_pd(a_.m128d, b_.m128d);
__m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d);
r = _mm_andnot_pd(bnan, r);
r = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan));
r_.m128d = r;
#else
r_.m128d = _mm_max_pd(a_.m128d, b_.m128d);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
r_.v128 = wasm_f64x2_max(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmax)
r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]);
#else
if (a_.values[i] > b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] < b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxnmq_f64
#define vmaxnmq_f64(a, b) simde_vmaxnmq_f64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MAXNM_H) */

View File

@@ -0,0 +1,400 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MAXV_H)
#define SIMDE_ARM_NEON_MAXV_H
#include "types.h"
#include <float.h>
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vmaxv_f32(simde_float32x2_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_f32(a);
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
r = -SIMDE_MATH_INFINITYF;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_f32
#define vmaxv_f32(v) simde_vmaxv_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vmaxv_s8(simde_int8x8_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_s8(a);
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
r = INT8_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_s8
#define vmaxv_s8(v) simde_vmaxv_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vmaxv_s16(simde_int16x4_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_s16(a);
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
r = INT16_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_s16
#define vmaxv_s16(v) simde_vmaxv_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vmaxv_s32(simde_int32x2_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_s32(a);
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
r = INT32_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_s32
#define vmaxv_s32(v) simde_vmaxv_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vmaxv_u8(simde_uint8x8_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_u8(a);
#else
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_u8
#define vmaxv_u8(v) simde_vmaxv_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vmaxv_u16(simde_uint16x4_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_u16(a);
#else
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_u16
#define vmaxv_u16(v) simde_vmaxv_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vmaxv_u32(simde_uint32x2_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxv_u32(a);
#else
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxv_u32
#define vmaxv_u32(v) simde_vmaxv_u32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vmaxvq_f32(simde_float32x4_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_f32(a);
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
r = -SIMDE_MATH_INFINITYF;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_f32
#define vmaxvq_f32(v) simde_vmaxvq_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vmaxvq_f64(simde_float64x2_t a) {
simde_float64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_f64(a);
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
r = -SIMDE_MATH_INFINITY;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_f64
#define vmaxvq_f64(v) simde_vmaxvq_f64(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vmaxvq_s8(simde_int8x16_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_s8(a);
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
r = INT8_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_s8
#define vmaxvq_s8(v) simde_vmaxvq_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vmaxvq_s16(simde_int16x8_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_s16(a);
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
r = INT16_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_s16
#define vmaxvq_s16(v) simde_vmaxvq_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vmaxvq_s32(simde_int32x4_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_s32(a);
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
r = INT32_MIN;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_s32
#define vmaxvq_s32(v) simde_vmaxvq_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vmaxvq_u8(simde_uint8x16_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_u8(a);
#else
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_u8
#define vmaxvq_u8(v) simde_vmaxvq_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vmaxvq_u16(simde_uint16x8_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_u16(a);
#else
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_u16
#define vmaxvq_u16(v) simde_vmaxvq_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vmaxvq_u32(simde_uint32x4_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vmaxvq_u32(a);
#else
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
r = 0;
SIMDE_VECTORIZE_REDUCTION(max:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] > r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmaxvq_u32
#define vmaxvq_u32(v) simde_vmaxvq_u32(v)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MAXV_H) */

View File

@@ -0,0 +1,681 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MIN_H)
#define SIMDE_ARM_NEON_MIN_H
#include "types.h"
#include "cgt.h"
#include "ceq.h"
#include "bsl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmin_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_f32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(64)
simde_float32x2_t r = simde_vbsl_f32(simde_vcgt_f32(b, a), a, b);
#if !defined(SIMDE_FAST_NANS)
r = simde_vbsl_f32(simde_vceq_f32(a, a), simde_vbsl_f32(simde_vceq_f32(b, b), r, b), a);
#endif
return r;
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
if (simde_math_isnanf(a_.values[i])) {
r_.values[i] = a_.values[i];
} else if (simde_math_isnanf(b_.values[i])) {
r_.values[i] = b_.values[i];
} else {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#else
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_f32
#define vmin_f32(a, b) simde_vmin_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmin_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmin_f64(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(64)
simde_float64x1_t r = simde_vbsl_f64(simde_vcgt_f64(b, a), a, b);
#if !defined(SIMDE_FAST_NANS)
r = simde_vbsl_f64(simde_vceq_f64(a, a), simde_vbsl_f64(simde_vceq_f64(b, b), r, b), a);
#endif
return r;
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
if (simde_math_isnan(a_.values[i])) {
r_.values[i] = a_.values[i];
} else if (simde_math_isnan(b_.values[i])) {
r_.values[i] = b_.values[i];
} else {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#else
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmin_f64
#define vmin_f64(a, b) simde_vmin_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmin_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_s8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s8(simde_vcgt_s8(b, a), a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_s8
#define vmin_s8(a, b) simde_vmin_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmin_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s16(simde_vcgt_s16(b, a), a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(b_.m64));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_s16
#define vmin_s16(a, b) simde_vmin_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmin_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_s32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s32(simde_vcgt_s32(b, a), a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_s32
#define vmin_s32(a, b) simde_vmin_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_x_vmin_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_s64(simde_vcgt_s64(b, a), a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_int64x1_from_private(r_);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_u8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u8(simde_vcgt_u8(b, a), a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_u8
#define vmin_u8(a, b) simde_vmin_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_u16(a, b);
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE)
return simde_vbsl_u16(simde_vcgt_u16(b, a), a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(a_.m64, b_.m64));
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_u16
#define vmin_u16(a, b) simde_vmin_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmin_u32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u32(simde_vcgt_u32(b, a), a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmin_u32
#define vmin_u32(a, b) simde_vmin_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_x_vmin_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if SIMDE_NATURAL_VECTOR_SIZE > 0
return simde_vbsl_u64(simde_vcgt_u64(b, a), a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint64x1_from_private(r_);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vminq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_f32(a, b);
#elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
return vec_min(a, b);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS)
r_.m128 = _mm_min_ps(a_.m128, b_.m128);
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128 = _mm_blendv_ps(_mm_set1_ps(SIMDE_MATH_NANF), _mm_min_ps(a_.m128, b_.m128), _mm_cmpord_ps(a_.m128, b_.m128));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
if (simde_math_isnanf(a_.values[i])) {
r_.values[i] = a_.values[i];
} else if (simde_math_isnanf(b_.values[i])) {
r_.values[i] = b_.values[i];
} else {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#else
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_f32
#define vminq_f32(a, b) simde_vminq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vminq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vminq_f64(a, b);
#elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
return vec_min(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS)
r_.m128d = _mm_min_pd(a_.m128d, b_.m128d);
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128d = _mm_blendv_pd(_mm_set1_pd(SIMDE_MATH_NAN), _mm_min_pd(a_.m128d, b_.m128d), _mm_cmpord_pd(a_.m128d, b_.m128d));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if !defined(SIMDE_FAST_NANS)
if (simde_math_isnan(a_.values[i])) {
r_.values[i] = a_.values[i];
} else if (simde_math_isnan(b_.values[i])) {
r_.values[i] = b_.values[i];
} else {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#else
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
#endif
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminq_f64
#define vminq_f64(a, b) simde_vminq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vminq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_min_epi8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_s8
#define vminq_s8(a, b) simde_vminq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vminq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_min_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_s16
#define vminq_s16(a, b) simde_vminq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vminq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_min_epi32(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_s32
#define vminq_s32(a, b) simde_vminq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_x_vminq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_min_epi64(a_.m128i, b_.m128i);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_min_epu8(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_u8
#define vminq_u8(a, b) simde_vminq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_min_epu16(a_.m128i, b_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
r_.m128i = _mm_sub_epi16(a_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u16x8_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_u16
#define vminq_u16(a, b) simde_vminq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vminq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i = _mm_min_epu32(a_.m128i, b_.m128i);
#elif defined(SIMDE_X86_SSE2_NATIVE)
const __m128i i32_min = _mm_set1_epi32(INT32_MIN);
const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i);
__m128i m =
_mm_cmpeq_epi32(
/* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */
_mm_and_si128(
difference,
_mm_xor_si128(
_mm_cmpgt_epi32(
_mm_xor_si128(difference, i32_min),
_mm_xor_si128(a_.m128i, i32_min)
),
_mm_set1_epi32(~INT32_C(0))
)
),
_mm_setzero_si128()
);
r_.m128i =
_mm_or_si128(
_mm_and_si128(m, a_.m128i),
_mm_andnot_si128(m, b_.m128i)
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u32x4_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminq_u32
#define vminq_u32(a, b) simde_vminq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_x_vminq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_min(a, b);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
}
return simde_uint64x2_from_private(r_);
#endif
}
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MIN_H) */

View File

@@ -0,0 +1,219 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MINNM_H)
#define SIMDE_ARM_NEON_MINNM_H
#include "types.h"
#include "cle.h"
#include "bsl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vminnm_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
return vminnm_f32(a, b);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fminf)
r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]);
#else
if (a_.values[i] < b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] > b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminnm_f32
#define vminnm_f32(a, b) simde_vminnm_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vminnm_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vminnm_f64(a, b);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmin)
r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]);
#else
if (a_.values[i] < b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] > b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminnm_f64
#define vminnm_f64(a, b) simde_vminnm_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
return vminnmq_f32(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS)
return simde_vbslq_f32(simde_vcleq_f32(a, b), a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_min(a, b);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE)
#if !defined(SIMDE_FAST_NANS)
__m128 r = _mm_min_ps(a_.m128, b_.m128);
__m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128);
r = _mm_andnot_ps(bnan, r);
r_.m128 = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan));
#else
r_.m128 = _mm_min_ps(a_.m128, b_.m128);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
r_.v128 = wasm_f32x4_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fminf)
r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]);
#else
if (a_.values[i] < b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] > b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vminnmq_f32
#define vminnmq_f32(a, b) simde_vminnmq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vminnmq_f64(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS)
return simde_vbslq_f64(simde_vcleq_f64(a, b), a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_min(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
#if !defined(SIMDE_FAST_NANS)
__m128d r = _mm_min_pd(a_.m128d, b_.m128d);
__m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d);
r = _mm_andnot_pd(bnan, r);
r_.m128d = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan));
#else
r_.m128d = _mm_min_pd(a_.m128d, b_.m128d);
#endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
r_.v128 = wasm_f64x2_min(a_.v128, b_.v128);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
#if defined(simde_math_fmin)
r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]);
#else
if (a_.values[i] < b_.values[i]) {
r_.values[i] = a_.values[i];
} else if (a_.values[i] > b_.values[i]) {
r_.values[i] = b_.values[i];
} else if (a_.values[i] == a_.values[i]) {
r_.values[i] = a_.values[i];
} else {
r_.values[i] = b_.values[i];
}
#endif
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminnmq_f64
#define vminnmq_f64(a, b) simde_vminnmq_f64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MINNM_H) */

View File

@@ -0,0 +1,424 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MINV_H)
#define SIMDE_ARM_NEON_MINV_H
#include "types.h"
#include <float.h>
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vminv_f32(simde_float32x2_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_f32(a);
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
r = SIMDE_MATH_INFINITYF;
#if defined(SIMDE_FAST_NANS)
SIMDE_VECTORIZE_REDUCTION(min:r)
#else
SIMDE_VECTORIZE
#endif
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
#if defined(SIMDE_FAST_NANS)
r = a_.values[i] < r ? a_.values[i] : r;
#else
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
#endif
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_f32
#define vminv_f32(v) simde_vminv_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vminv_s8(simde_int8x8_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_s8(a);
#else
simde_int8x8_private a_ = simde_int8x8_to_private(a);
r = INT8_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_s8
#define vminv_s8(v) simde_vminv_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vminv_s16(simde_int16x4_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_s16(a);
#else
simde_int16x4_private a_ = simde_int16x4_to_private(a);
r = INT16_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_s16
#define vminv_s16(v) simde_vminv_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vminv_s32(simde_int32x2_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_s32(a);
#else
simde_int32x2_private a_ = simde_int32x2_to_private(a);
r = INT32_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_s32
#define vminv_s32(v) simde_vminv_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vminv_u8(simde_uint8x8_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_u8(a);
#else
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
r = UINT8_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_u8
#define vminv_u8(v) simde_vminv_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vminv_u16(simde_uint16x4_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_u16(a);
#else
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
r = UINT16_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_u16
#define vminv_u16(v) simde_vminv_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vminv_u32(simde_uint32x2_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminv_u32(a);
#else
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
r = UINT32_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminv_u32
#define vminv_u32(v) simde_vminv_u32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vminvq_f32(simde_float32x4_t a) {
simde_float32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_f32(a);
#else
simde_float32x4_private a_ = simde_float32x4_to_private(a);
r = SIMDE_MATH_INFINITYF;
#if defined(SIMDE_FAST_NANS)
SIMDE_VECTORIZE_REDUCTION(min:r)
#else
SIMDE_VECTORIZE
#endif
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
#if defined(SIMDE_FAST_NANS)
r = a_.values[i] < r ? a_.values[i] : r;
#else
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
#endif
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_f32
#define vminvq_f32(v) simde_vminvq_f32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vminvq_f64(simde_float64x2_t a) {
simde_float64_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_f64(a);
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
r = SIMDE_MATH_INFINITY;
#if defined(SIMDE_FAST_NANS)
SIMDE_VECTORIZE_REDUCTION(min:r)
#else
SIMDE_VECTORIZE
#endif
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
#if defined(SIMDE_FAST_NANS)
r = a_.values[i] < r ? a_.values[i] : r;
#else
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
#endif
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_f64
#define vminvq_f64(v) simde_vminvq_f64(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int8_t
simde_vminvq_s8(simde_int8x16_t a) {
int8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_s8(a);
#else
simde_int8x16_private a_ = simde_int8x16_to_private(a);
r = INT8_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_s8
#define vminvq_s8(v) simde_vminvq_s8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int16_t
simde_vminvq_s16(simde_int16x8_t a) {
int16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_s16(a);
#else
simde_int16x8_private a_ = simde_int16x8_to_private(a);
r = INT16_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_s16
#define vminvq_s16(v) simde_vminvq_s16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vminvq_s32(simde_int32x4_t a) {
int32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_s32(a);
#else
simde_int32x4_private a_ = simde_int32x4_to_private(a);
r = INT32_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_s32
#define vminvq_s32(v) simde_vminvq_s32(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint8_t
simde_vminvq_u8(simde_uint8x16_t a) {
uint8_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_u8(a);
#else
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
r = UINT8_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_u8
#define vminvq_u8(v) simde_vminvq_u8(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vminvq_u16(simde_uint16x8_t a) {
uint16_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_u16(a);
#else
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
r = UINT16_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_u16
#define vminvq_u16(v) simde_vminvq_u16(v)
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vminvq_u32(simde_uint32x4_t a) {
uint32_t r;
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r = vminvq_u32(a);
#else
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
r = UINT32_MAX;
SIMDE_VECTORIZE_REDUCTION(min:r)
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
r = a_.values[i] < r ? a_.values[i] : r;
}
#endif
return r;
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vminvq_u32
#define vminvq_u32(v) simde_vminvq_u32(v)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MINV_H) */

View File

@@ -0,0 +1,296 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MLA_H)
#define SIMDE_ARM_NEON_MLA_H
#include "types.h"
#include "add.h"
#include "mul.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_f32(a, b, c);
#else
return simde_vadd_f32(simde_vmul_f32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_f32
#define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmla_f64(a, b, c);
#else
return simde_vadd_f64(simde_vmul_f64(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmla_f64
#define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_s8(a, b, c);
#else
return simde_vadd_s8(simde_vmul_s8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_s8
#define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_s16(a, b, c);
#else
return simde_vadd_s16(simde_vmul_s16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_s16
#define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_s32(a, b, c);
#else
return simde_vadd_s32(simde_vmul_s32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_s32
#define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_u8(a, b, c);
#else
return simde_vadd_u8(simde_vmul_u8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_u8
#define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_u16(a, b, c);
#else
return simde_vadd_u16(simde_vmul_u16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_u16
#define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_u32(a, b, c);
#else
return simde_vadd_u32(simde_vmul_u32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_u32
#define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_f32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_madd(b, c, a);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b),
c_ = simde_float32x4_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128);
#endif
return simde_float32x4_from_private(r_);
#else
return simde_vaddq_f32(simde_vmulq_f32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_f32
#define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlaq_f64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_madd(b, c, a);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b),
c_ = simde_float64x2_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d);
#endif
return simde_float64x2_from_private(r_);
#else
return simde_vaddq_f64(simde_vmulq_f64(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlaq_f64
#define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_s8(a, b, c);
#else
return simde_vaddq_s8(simde_vmulq_s8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_s8
#define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_s16(a, b, c);
#else
return simde_vaddq_s16(simde_vmulq_s16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_s16
#define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_s32(a, b, c);
#else
return simde_vaddq_s32(simde_vmulq_s32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_s32
#define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_u8(a, b, c);
#else
return simde_vaddq_u8(simde_vmulq_u8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_u8
#define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_u16(a, b, c);
#else
return simde_vaddq_u16(simde_vmulq_u16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_u16
#define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_u32(a, b, c);
#else
return simde_vaddq_u32(simde_vmulq_u32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_u32
#define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLA_H) */

View File

@@ -0,0 +1,333 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MLA_N_H)
#define SIMDE_ARM_NEON_MLA_N_H
#include "types.h"
#include "add.h"
#include "mul.h"
#include "mul_n.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmla_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_n_f32(a, b, c);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_n_f32
#define vmla_n_f32(a, b, c) simde_vmla_n_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmla_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_n_s16(a, b, c);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) && !defined(SIMDE_BUG_GCC_100762)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_n_s16
#define vmla_n_s16(a, b, c) simde_vmla_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmla_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_n_s32(a, b, c);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_n_s32
#define vmla_n_s32(a, b, c) simde_vmla_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmla_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_n_u16(a, b, c);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_n_u16
#define vmla_n_u16(a, b, c) simde_vmla_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmla_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmla_n_u32(a, b, c);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmla_n_u32
#define vmla_n_u32(a, b, c) simde_vmla_n_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmlaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_n_f32(a, b, c);
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
return simde_vaddq_f32(simde_vmulq_n_f32(b, c), a);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_n_f32
#define vmlaq_n_f32(a, b, c) simde_vmlaq_n_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlaq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_n_s16(a, b, c);
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
return simde_vaddq_s16(simde_vmulq_n_s16(b, c), a);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_n_s16
#define vmlaq_n_s16(a, b, c) simde_vmlaq_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlaq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_n_s32(a, b, c);
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
return simde_vaddq_s32(simde_vmulq_n_s32(b, c), a);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_n_s32
#define vmlaq_n_s32(a, b, c) simde_vmlaq_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlaq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_n_u16(a, b, c);
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
return simde_vaddq_u16(simde_vmulq_n_u16(b, c), a);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_n_u16
#define vmlaq_n_u16(a, b, c) simde_vmlaq_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlaq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_n_u32(a, b, c);
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
return simde_vaddq_u32(simde_vmulq_n_u32(b, c), a);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.values = (b_.values * c) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c) + a_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlaq_n_u32
#define vmlaq_n_u32(a, b, c) simde_vmlaq_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLA_N_H) */

View File

@@ -0,0 +1,156 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MLAL_H)
#define SIMDE_ARM_NEON_MLAL_H
#include "movl.h"
#include "mla.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_s8(a, b, c);
#else
return simde_vmlaq_s16(a, simde_vmovl_s8(b), simde_vmovl_s8(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_s8
#define vmlal_s8(a, b, c) simde_vmlal_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_s16(a, b, c);
#else
return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vmovl_s16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_s16
#define vmlal_s16(a, b, c) simde_vmlal_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_s32(a, b, c);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(simde_vmovl_s32(b)),
c_ = simde_int64x2_to_private(simde_vmovl_s32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_s32
#define vmlal_s32(a, b, c) simde_vmlal_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_u8(a, b, c);
#else
return simde_vmlaq_u16(a, simde_vmovl_u8(b), simde_vmovl_u8(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_u8
#define vmlal_u8(a, b, c) simde_vmlal_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_u16(a, b, c);
#else
return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vmovl_u16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_u16
#define vmlal_u16(a, b, c) simde_vmlal_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_u32(a, b, c);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)),
c_ = simde_uint64x2_to_private(simde_vmovl_u32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_u32
#define vmlal_u32(a, b, c) simde_vmlal_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLAL_H) */

View File

@@ -0,0 +1,156 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_H)
#define SIMDE_ARM_NEON_MLAL_HIGH_H
#include "movl_high.h"
#include "mla.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_s8(a, b, c);
#else
return simde_vmlaq_s16(a, simde_vmovl_high_s8(b), simde_vmovl_high_s8(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_s8
#define vmlal_high_s8(a, b, c) simde_vmlal_high_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_s16(a, b, c);
#else
return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vmovl_high_s16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_s16
#define vmlal_high_s16(a, b, c) simde_vmlal_high_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_s32(a, b, c);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
c_ = simde_int64x2_to_private(simde_vmovl_high_s32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_s32
#define vmlal_high_s32(a, b, c) simde_vmlal_high_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_u8(a, b, c);
#else
return simde_vmlaq_u16(a, simde_vmovl_high_u8(b), simde_vmovl_high_u8(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_u8
#define vmlal_high_u8(a, b, c) simde_vmlal_high_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_u16(a, b, c);
#else
return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vmovl_high_u16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_u16
#define vmlal_high_u16(a, b, c) simde_vmlal_high_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_u32(a, b, c);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
c_ = simde_uint64x2_to_private(simde_vmovl_high_u32(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_u32
#define vmlal_high_u32(a, b, c) simde_vmlal_high_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) */

View File

@@ -0,0 +1,128 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Décio Luiz Gazzoni Filho <decio@decpp.net>
*/
#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H)
#define SIMDE_ARM_NEON_MLAL_HIGH_N_H
#include "movl_high.h"
#include "dup_n.h"
#include "mla.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_n_s16(a, b, c);
#else
return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_n_s16
#define vmlal_high_n_s16(a, b, c) simde_vmlal_high_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_n_s32(a, b, c);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_n_s32
#define vmlal_high_n_s32(a, b, c) simde_vmlal_high_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlal_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_n_u16(a, b, c);
#else
return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_n_u16
#define vmlal_high_n_u16(a, b, c) simde_vmlal_high_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlal_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlal_high_n_u32(a, b, c);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_high_n_u32
#define vmlal_high_n_u32(a, b, c) simde_vmlal_high_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) */

View File

@@ -0,0 +1,120 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLAL_LANE_H)
#define SIMDE_ARM_NEON_MLAL_LANE_H
#include "mlal.h"
#include "dup_lane.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlal_lane_s16(a, b, v, lane) vmlal_lane_s16((a), (b), (v), (lane))
#else
#define simde_vmlal_lane_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_lane_s16
#define vmlal_lane_s16(a, b, c, lane) simde_vmlal_lane_s16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlal_lane_s32(a, b, v, lane) vmlal_lane_s32((a), (b), (v), (lane))
#else
#define simde_vmlal_lane_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_lane_s32
#define vmlal_lane_s32(a, b, c, lane) simde_vmlal_lane_s32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlal_lane_u16(a, b, v, lane) vmlal_lane_u16((a), (b), (v), (lane))
#else
#define simde_vmlal_lane_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_lane_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_lane_u16
#define vmlal_lane_u16(a, b, c, lane) simde_vmlal_lane_u16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlal_lane_u32(a, b, v, lane) vmlal_lane_u32((a), (b), (v), (lane))
#else
#define simde_vmlal_lane_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_lane_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_lane_u32
#define vmlal_lane_u32(a, b, c, lane) simde_vmlal_lane_u32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlal_laneq_s16(a, b, v, lane) vmlal_laneq_s16((a), (b), (v), (lane))
#else
#define simde_vmlal_laneq_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_laneq_s16
#define vmlal_laneq_s16(a, b, c, lane) simde_vmlal_laneq_s16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlal_laneq_s32(a, b, v, lane) vmlal_laneq_s32((a), (b), (v), (lane))
#else
#define simde_vmlal_laneq_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_laneq_s32
#define vmlal_laneq_s32(a, b, c, lane) simde_vmlal_laneq_s32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlal_laneq_u16(a, b, v, lane) vmlal_laneq_u16((a), (b), (v), (lane))
#else
#define simde_vmlal_laneq_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_laneq_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_laneq_u16
#define vmlal_laneq_u16(a, b, c, lane) simde_vmlal_laneq_u16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlal_laneq_u32(a, b, v, lane) vmlal_laneq_u32((a), (b), (v), (lane))
#else
#define simde_vmlal_laneq_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_laneq_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlal_laneq_u32
#define vmlal_laneq_u32(a, b, c, lane) simde_vmlal_laneq_u32((a), (b), (c), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLAL_LANE_H) */

View File

@@ -0,0 +1,128 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLAL_N_H)
#define SIMDE_ARM_NEON_MLAL_N_H
#include "movl.h"
#include "dup_n.h"
#include "mla.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_n_s16(a, b, c);
#else
return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_n_s16
#define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_n_s32(a, b, c);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(simde_vmovl_s32(b)),
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_n_s32
#define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_n_u16(a, b, c);
#else
return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_n_u16
#define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlal_n_u32(a, b, c);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)),
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = (b_.values * c_.values) + a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlal_n_u32
#define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */

View File

@@ -0,0 +1,290 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLS_H)
#define SIMDE_ARM_NEON_MLS_H
#include "mul.h"
#include "sub.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmls_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_f32(a, b, c);
#else
return simde_vsub_f32(a, simde_vmul_f32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_f32
#define vmls_f32(a, b, c) simde_vmls_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmls_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmls_f64(a, b, c);
#else
return simde_vsub_f64(a, simde_vmul_f64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_f64
#define vmls_f64(a, b, c) simde_vmls_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmls_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_s8(a, b, c);
#else
return simde_vsub_s8(a, simde_vmul_s8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_s8
#define vmls_s8(a, b, c) simde_vmls_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmls_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_s16(a, b, c);
#else
return simde_vsub_s16(a, simde_vmul_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_s16
#define vmls_s16(a, b, c) simde_vmls_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmls_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_s32(a, b, c);
#else
return simde_vsub_s32(a, simde_vmul_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_s32
#define vmls_s32(a, b, c) simde_vmls_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmls_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_u8(a, b, c);
#else
return simde_vsub_u8(a, simde_vmul_u8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_u8
#define vmls_u8(a, b, c) simde_vmls_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmls_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_u16(a, b, c);
#else
return simde_vsub_u16(a, simde_vmul_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_u16
#define vmls_u16(a, b, c) simde_vmls_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmls_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_u32(a, b, c);
#else
return simde_vsub_u32(a, simde_vmul_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_u32
#define vmls_u32(a, b, c) simde_vmls_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmlsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_f32(a, b, c);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b),
c_ = simde_float32x4_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128 = _mm_fnmadd_ps(b_.m128, c_.m128, a_.m128);
#endif
return simde_float32x4_from_private(r_);
#else
return simde_vsubq_f32(a, simde_vmulq_f32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_f32
#define vmlsq_f32(a, b, c) simde_vmlsq_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmlsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsq_f64(a, b, c);
#elif \
defined(SIMDE_X86_FMA_NATIVE)
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b),
c_ = simde_float64x2_to_private(c);
#if defined(SIMDE_X86_FMA_NATIVE)
r_.m128d = _mm_fnmadd_pd(b_.m128d, c_.m128d, a_.m128d);
#endif
return simde_float64x2_from_private(r_);
#else
return simde_vsubq_f64(a, simde_vmulq_f64(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_f64
#define vmlsq_f64(a, b, c) simde_vmlsq_f64((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmlsq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_s8(a, b, c);
#else
return simde_vsubq_s8(a, simde_vmulq_s8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_s8
#define vmlsq_s8(a, b, c) simde_vmlsq_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlsq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_s16(a, b, c);
#else
return simde_vsubq_s16(a, simde_vmulq_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_s16
#define vmlsq_s16(a, b, c) simde_vmlsq_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_s32(a, b, c);
#else
return simde_vsubq_s32(a, simde_vmulq_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_s32
#define vmlsq_s32(a, b, c) simde_vmlsq_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmlsq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_u8(a, b, c);
#else
return simde_vsubq_u8(a, simde_vmulq_u8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_u8
#define vmlsq_u8(a, b, c) simde_vmlsq_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlsq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_u16(a, b, c);
#else
return simde_vsubq_u16(a, simde_vmulq_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_u16
#define vmlsq_u16(a, b, c) simde_vmlsq_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_u32(a, b, c);
#else
return simde_vsubq_u32(a, simde_vmulq_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_u32
#define vmlsq_u32(a, b, c) simde_vmlsq_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLS_H) */

View File

@@ -0,0 +1,181 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLS_N_H)
#define SIMDE_ARM_NEON_MLS_N_H
#include "sub.h"
#include "dup_n.h"
#include "mls.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmls_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_n_f32(a, b, c);
#else
return simde_vmls_f32(a, b, simde_vdup_n_f32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_n_f32
#define vmls_n_f32(a, b, c) simde_vmls_n_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmls_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_n_s16(a, b, c);
#else
return simde_vmls_s16(a, b, simde_vdup_n_s16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_n_s16
#define vmls_n_s16(a, b, c) simde_vmls_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmls_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_n_s32(a, b, c);
#else
return simde_vmls_s32(a, b, simde_vdup_n_s32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_n_s32
#define vmls_n_s32(a, b, c) simde_vmls_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmls_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_n_u16(a, b, c);
#else
return simde_vmls_u16(a, b, simde_vdup_n_u16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_n_u16
#define vmls_n_u16(a, b, c) simde_vmls_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmls_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmls_n_u32(a, b, c);
#else
return simde_vmls_u32(a, b, simde_vdup_n_u32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmls_n_u32
#define vmls_n_u32(a, b, c) simde_vmls_n_u32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmlsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_n_f32(a, b, c);
#else
return simde_vmlsq_f32(a, b, simde_vdupq_n_f32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_n_f32
#define vmlsq_n_f32(a, b, c) simde_vmlsq_n_f32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlsq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_n_s16(a, b, c);
#else
return simde_vmlsq_s16(a, b, simde_vdupq_n_s16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_n_s16
#define vmlsq_n_s16(a, b, c) simde_vmlsq_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_n_s32(a, b, c);
#else
return simde_vmlsq_s32(a, b, simde_vdupq_n_s32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_n_s32
#define vmlsq_n_s32(a, b, c) simde_vmlsq_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlsq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_n_u16(a, b, c);
#else
return simde_vmlsq_u16(a, b, simde_vdupq_n_u16(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_n_u16
#define vmlsq_n_u16(a, b, c) simde_vmlsq_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsq_n_u32(a, b, c);
#else
return simde_vmlsq_u32(a, b, simde_vdupq_n_u32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsq_n_u32
#define vmlsq_n_u32(a, b, c) simde_vmlsq_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLS_N_H) */

View File

@@ -0,0 +1,124 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLSL_H)
#define SIMDE_ARM_NEON_MLSL_H
#include "mull.h"
#include "sub.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlsl_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_s8(a, b, c);
#else
return simde_vsubq_s16(a, simde_vmull_s8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_s8
#define vmlsl_s8(a, b, c) simde_vmlsl_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_s16(a, b, c);
#else
return simde_vsubq_s32(a, simde_vmull_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_s16
#define vmlsl_s16(a, b, c) simde_vmlsl_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_s32(a, b, c);
#else
return simde_vsubq_s64(a, simde_vmull_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_s32
#define vmlsl_s32(a, b, c) simde_vmlsl_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlsl_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_u8(a, b, c);
#else
return simde_vsubq_u16(a, simde_vmull_u8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_u8
#define vmlsl_u8(a, b, c) simde_vmlsl_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsl_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_u16(a, b, c);
#else
return simde_vsubq_u32(a, simde_vmull_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_u16
#define vmlsl_u16(a, b, c) simde_vmlsl_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlsl_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_u32(a, b, c);
#else
return simde_vsubq_u64(a, simde_vmull_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_u32
#define vmlsl_u32(a, b, c) simde_vmlsl_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLSL_H) */

View File

@@ -0,0 +1,124 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_H)
#define SIMDE_ARM_NEON_MLSL_HIGH_H
#include "mull_high.h"
#include "sub.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmlsl_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_s8(a, b, c);
#else
return simde_vsubq_s16(a, simde_vmull_high_s8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_s8
#define vmlsl_high_s8(a, b, c) simde_vmlsl_high_s8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_s16(a, b, c);
#else
return simde_vsubq_s32(a, simde_vmull_high_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_s16
#define vmlsl_high_s16(a, b, c) simde_vmlsl_high_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_s32(a, b, c);
#else
return simde_vsubq_s64(a, simde_vmull_high_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_s32
#define vmlsl_high_s32(a, b, c) simde_vmlsl_high_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmlsl_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_u8(a, b, c);
#else
return simde_vsubq_u16(a, simde_vmull_high_u8(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_u8
#define vmlsl_high_u8(a, b, c) simde_vmlsl_high_u8((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsl_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_u16(a, b, c);
#else
return simde_vsubq_u32(a, simde_vmull_high_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_u16
#define vmlsl_high_u16(a, b, c) simde_vmlsl_high_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlsl_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_u32(a, b, c);
#else
return simde_vsubq_u64(a, simde_vmull_high_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_u32
#define vmlsl_high_u32(a, b, c) simde_vmlsl_high_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) */

View File

@@ -0,0 +1,128 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Décio Luiz Gazzoni Filho <decio@decpp.net>
*/
#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H)
#define SIMDE_ARM_NEON_MLSL_HIGH_N_H
#include "movl_high.h"
#include "dup_n.h"
#include "mls.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_n_s16(a, b, c);
#else
return simde_vmlsq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_n_s16
#define vmlsl_high_n_s16(a, b, c) simde_vmlsl_high_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_n_s32(a, b, c);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values - (b_.values * c_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]);
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_n_s32
#define vmlsl_high_n_s32(a, b, c) simde_vmlsl_high_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsl_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_n_u16(a, b, c);
#else
return simde_vmlsq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_n_u16
#define vmlsl_high_n_u16(a, b, c) simde_vmlsl_high_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlsl_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmlsl_high_n_u32(a, b, c);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values - (b_.values * c_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_high_n_u32
#define vmlsl_high_n_u32(a, b, c) simde_vmlsl_high_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) */

View File

@@ -0,0 +1,120 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLSL_LANE_H)
#define SIMDE_ARM_NEON_MLSL_LANE_H
#include "mlsl.h"
#include "dup_lane.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlsl_lane_s16(a, b, v, lane) vmlsl_lane_s16((a), (b), (v), (lane))
#else
#define simde_vmlsl_lane_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_lane_s16
#define vmlsl_lane_s16(a, b, c, lane) simde_vmlsl_lane_s16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlsl_lane_s32(a, b, v, lane) vmlsl_lane_s32((a), (b), (v), (lane))
#else
#define simde_vmlsl_lane_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_lane_s32
#define vmlsl_lane_s32(a, b, c, lane) simde_vmlsl_lane_s32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlsl_lane_u16(a, b, v, lane) vmlsl_lane_u16((a), (b), (v), (lane))
#else
#define simde_vmlsl_lane_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_lane_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_lane_u16
#define vmlsl_lane_u16(a, b, c, lane) simde_vmlsl_lane_u16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmlsl_lane_u32(a, b, v, lane) vmlsl_lane_u32((a), (b), (v), (lane))
#else
#define simde_vmlsl_lane_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_lane_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_lane_u32
#define vmlsl_lane_u32(a, b, c, lane) simde_vmlsl_lane_u32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlsl_laneq_s16(a, b, v, lane) vmlsl_laneq_s16((a), (b), (v), (lane))
#else
#define simde_vmlsl_laneq_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_laneq_s16
#define vmlsl_laneq_s16(a, b, c, lane) simde_vmlsl_laneq_s16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlsl_laneq_s32(a, b, v, lane) vmlsl_laneq_s32((a), (b), (v), (lane))
#else
#define simde_vmlsl_laneq_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_laneq_s32
#define vmlsl_laneq_s32(a, b, c, lane) simde_vmlsl_laneq_s32((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlsl_laneq_u16(a, b, v, lane) vmlsl_laneq_u16((a), (b), (v), (lane))
#else
#define simde_vmlsl_laneq_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_laneq_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_laneq_u16
#define vmlsl_laneq_u16(a, b, c, lane) simde_vmlsl_laneq_u16((a), (b), (c), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmlsl_laneq_u32(a, b, v, lane) vmlsl_laneq_u32((a), (b), (v), (lane))
#else
#define simde_vmlsl_laneq_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_laneq_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmlsl_laneq_u32
#define vmlsl_laneq_u32(a, b, c, lane) simde_vmlsl_laneq_u32((a), (b), (c), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLSL_LANE_H) */

View File

@@ -0,0 +1,96 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MLSL_N_H)
#define SIMDE_ARM_NEON_MLSL_N_H
#include "mull_n.h"
#include "sub.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_n_s16(a, b, c);
#else
return simde_vsubq_s32(a, simde_vmull_n_s16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_n_s16
#define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_n_s32(a, b, c);
#else
return simde_vsubq_s64(a, simde_vmull_n_s32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_n_s32
#define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_n_u16(a, b, c);
#else
return simde_vsubq_u32(a, simde_vmull_n_u16(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_n_u16
#define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlsl_n_u32(a, b, c);
#else
return simde_vsubq_u64(a, simde_vmull_n_u32(b, c));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmlsl_n_u32
#define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */

View File

@@ -0,0 +1,238 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MOVL_H)
#define SIMDE_ARM_NEON_MOVL_H
#include "combine.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmovl_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_s8(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int16x8_private r_;
simde_int8x16_private a_ = simde_int8x16_to_private(simde_vcombine_s8(a, a));
r_.v128 = wasm_i16x8_extend_low_i8x16(a_.v128);
return simde_int16x8_from_private(r_);
#else
simde_int16x8_private r_;
simde_int8x8_private a_ = simde_int8x8_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_s8
#define vmovl_s8(a) simde_vmovl_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmovl_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_s16(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int32x4_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(simde_vcombine_s16(a, a));
r_.v128 = wasm_i32x4_extend_low_i16x8(a_.v128);
return simde_int32x4_from_private(r_);
#else
simde_int32x4_private r_;
simde_int16x4_private a_ = simde_int16x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_s16
#define vmovl_s16(a) simde_vmovl_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmovl_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_s32(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_int64x2_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(simde_vcombine_s32(a, a));
r_.v128 = wasm_i64x2_extend_low_i32x4(a_.v128);
return simde_int64x2_from_private(r_);
#else
simde_int64x2_private r_;
simde_int32x2_private a_ = simde_int32x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]);
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_s32
#define vmovl_s32(a) simde_vmovl_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmovl_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_u8(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint16x8_private r_;
simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a));
r_.v128 = wasm_u16x8_extend_low_u8x16(a_.v128);
return simde_uint16x8_from_private(r_);
#else
simde_uint16x8_private r_;
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_u8
#define vmovl_u8(a) simde_vmovl_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmovl_u16(simde_uint16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_u16(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint32x4_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vcombine_u16(a, a));
r_.v128 = wasm_u32x4_extend_low_u16x8(a_.v128);
return simde_uint32x4_from_private(r_);
#else
simde_uint32x4_private r_;
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_u16
#define vmovl_u16(a) simde_vmovl_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmovl_u32(simde_uint32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovl_u32(a);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
simde_uint64x2_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vcombine_u32(a, a));
r_.v128 = wasm_u64x2_extend_low_u32x4(a_.v128);
return simde_uint64x2_from_private(r_);
#else
simde_uint64x2_private r_;
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovl_u32
#define vmovl_u32(a) simde_vmovl_u32((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MOVL_H) */

View File

@@ -0,0 +1,126 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MOVL_HIGH_H)
#define SIMDE_ARM_NEON_MOVL_HIGH_H
#include "types.h"
#include "movl.h"
#include "get_high.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmovl_high_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_s8(a);
#else
return simde_vmovl_s8(simde_vget_high_s8(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_s8
#define vmovl_high_s8(a) simde_vmovl_high_s8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmovl_high_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_s16(a);
#else
return simde_vmovl_s16(simde_vget_high_s16(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_s16
#define vmovl_high_s16(a) simde_vmovl_high_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmovl_high_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_s32(a);
#else
return simde_vmovl_s32(simde_vget_high_s32(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_s32
#define vmovl_high_s32(a) simde_vmovl_high_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmovl_high_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_u8(a);
#else
return simde_vmovl_u8(simde_vget_high_u8(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_u8
#define vmovl_high_u8(a) simde_vmovl_high_u8((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmovl_high_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_u16(a);
#else
return simde_vmovl_u16(simde_vget_high_u16(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_u16
#define vmovl_high_u16(a) simde_vmovl_high_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmovl_high_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovl_high_u32(a);
#else
return simde_vmovl_u32(simde_vget_high_u32(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovl_high_u32
#define vmovl_high_u32(a) simde_vmovl_high_u32((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) */

View File

@@ -0,0 +1,195 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MOVN_H)
#define SIMDE_ARM_NEON_MOVN_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmovn_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_s16(a);
#else
simde_int8x8_private r_;
simde_int16x8_private a_ = simde_int16x8_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i]);
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_s16
#define vmovn_s16(a) simde_vmovn_s16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmovn_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_s32(a);
#else
simde_int16x4_private r_;
simde_int32x4_private a_ = simde_int32x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]);
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_s32
#define vmovn_s32(a) simde_vmovn_s32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmovn_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_s64(a);
#else
simde_int32x2_private r_;
simde_int64x2_private a_ = simde_int64x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]);
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_s64
#define vmovn_s64(a) simde_vmovn_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmovn_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_u16(a);
#else
simde_uint8x8_private r_;
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i]);
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_u16
#define vmovn_u16(a) simde_vmovn_u16((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmovn_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_u32(a);
#else
simde_uint16x4_private r_;
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]);
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_u32
#define vmovn_u32(a) simde_vmovn_u32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmovn_u64(simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmovn_u64(a);
#else
simde_uint32x2_private r_;
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_)
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmovn_u64
#define vmovn_u64(a) simde_vmovn_u64((a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MOVN_H) */

View File

@@ -0,0 +1,125 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H)
#define SIMDE_ARM_NEON_MOVN_HIGH_H
#include "types.h"
#include "movn.h"
#include "combine.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_s16(r, a);
#else
return simde_vcombine_s8(r, simde_vmovn_s16(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_s16
#define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_s32(r, a);
#else
return simde_vcombine_s16(r, simde_vmovn_s32(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_s32
#define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_s64(r, a);
#else
return simde_vcombine_s32(r, simde_vmovn_s64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_s64
#define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_u16(r, a);
#else
return simde_vcombine_u8(r, simde_vmovn_u16(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_u16
#define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_u32(r, a);
#else
return simde_vcombine_u16(r, simde_vmovn_u32(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_u32
#define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmovn_high_u64(r, a);
#else
return simde_vcombine_u32(r, simde_vmovn_u64(a));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmovn_high_u64
#define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */

View File

@@ -0,0 +1,579 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MUL_H)
#define SIMDE_ARM_NEON_MUL_H
#include "types.h"
#include "reinterpret.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_f32(a, b);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_f32
#define vmul_f32(a, b) simde_vmul_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmul_f64(simde_float64x1_t a, simde_float64x1_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmul_f64(a, b);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_f64
#define vmul_f64(a, b) simde_vmul_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmul_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_s8(a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_s8
#define vmul_s8(a, b) simde_vmul_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmul_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_s16(a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _m_pmullw(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_s16
#define vmul_s16(a, b) simde_vmul_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmul_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_s32(a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_s32
#define vmul_s32(a, b) simde_vmul_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_x_vmul_s64(simde_int64x1_t a, simde_int64x1_t b) {
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmul_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_u8
#define vmul_u8(a, b) simde_vmul_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmul_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_u16
#define vmul_u16(a, b) simde_vmul_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmul_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_u32
#define vmul_u32(a, b) simde_vmul_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_x_vmul_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
}
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmulq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_f32(a, b);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE_NATIVE)
r_.m128 = _mm_mul_ps(a_.m128, b_.m128);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_mul(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_f32
#define vmulq_f32(a, b) simde_vmulq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmulq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmulq_f64(a, b);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128d = _mm_mul_pd(a_.m128d, b_.m128d);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_mul(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_f64
#define vmulq_f64(a, b) simde_vmulq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmulq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_mul(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
/* https://stackoverflow.com/a/29155682/501126 */
const __m128i dst_even = _mm_mullo_epi16(a_.m128i, b_.m128i);
r_.m128i =
_mm_or_si128(
_mm_slli_epi16(
_mm_mullo_epi16(
_mm_srli_epi16(a_.m128i, 8),
_mm_srli_epi16(b_.m128i, 8)
),
8
),
#if defined(SIMDE_X86_AVX2_NATIVE)
_mm_and_si128(dst_even, _mm_set1_epi16(0xFF))
#else
_mm_srli_epi16(
_mm_slli_epi16(dst_even, 8),
8
)
#endif
);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_s8
#define vmulq_s8(a, b) simde_vmulq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmulq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_s16(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_mullo_epi16(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_s16
#define vmulq_s16(a, b) simde_vmulq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmulq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_s32(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_mul(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_s32
#define vmulq_s32(a, b) simde_vmulq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_x_vmulq_s64(simde_int64x2_t a, simde_int64x2_t b) {
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_mul(a_.v128, b_.v128);
#elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE)
r_.m128i = _mm_mullo_epi64(a_.m128i, b_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values * b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
}
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmulq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_u8(a, b);
#else
return
simde_vreinterpretq_u8_s8(
simde_vmulq_s8(
simde_vreinterpretq_s8_u8(a),
simde_vreinterpretq_s8_u8(b)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_u8
#define vmulq_u8(a, b) simde_vmulq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmulq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_u16(a, b);
#else
return
simde_vreinterpretq_u16_s16(
simde_vmulq_s16(
simde_vreinterpretq_s16_u16(a),
simde_vreinterpretq_s16_u16(b)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_u16
#define vmulq_u16(a, b) simde_vmulq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmulq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_u32(a, b);
#else
return
simde_vreinterpretq_u32_s32(
simde_vmulq_s32(
simde_vreinterpretq_s32_u32(a),
simde_vreinterpretq_s32_u32(b)
)
);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_u32
#define vmulq_u32(a, b) simde_vmulq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
return
simde_vreinterpretq_u64_s64(
simde_x_vmulq_s64(
simde_vreinterpretq_s64_u64(a),
simde_vreinterpretq_s64_u64(b)
)
);
}
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MUL_H) */

View File

@@ -0,0 +1,695 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MUL_LANE_H)
#define SIMDE_ARM_NEON_MUL_LANE_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vmuld_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
return a * simde_float64x1_to_private(b).values[lane];
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vmuld_lane_f64(a, b, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_lane_f64(a, b, lane))
#else
#define simde_vmuld_lane_f64(a, b, lane) vmuld_lane_f64((a), (b), (lane))
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmuld_lane_f64
#define vmuld_lane_f64(a, b, lane) simde_vmuld_lane_f64(a, b, lane)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vmuld_laneq_f64(simde_float64_t a, simde_float64x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
return a * simde_float64x2_to_private(b).values[lane];
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vmuld_laneq_f64(a, b, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_laneq_f64(a, b, lane))
#else
#define simde_vmuld_laneq_f64(a, b, lane) vmuld_laneq_f64((a), (b), (lane))
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmuld_laneq_f64
#define vmuld_laneq_f64(a, b, lane) simde_vmuld_laneq_f64(a, b, lane)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vmuls_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
return a * simde_float32x2_to_private(b).values[lane];
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vmuls_lane_f32(a, b, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_lane_f32(a, b, lane))
#else
#define simde_vmuls_lane_f32(a, b, lane) vmuls_lane_f32((a), (b), (lane))
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmuls_lane_f32
#define vmuls_lane_f32(a, b, lane) simde_vmuls_lane_f32(a, b, lane)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
return a * simde_float32x4_to_private(b).values[lane];
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vmuls_laneq_f32(a, b, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_laneq_f32(a, b, lane))
#else
#define simde_vmuls_laneq_f32(a, b, lane) vmuls_laneq_f32((a), (b), (lane))
#endif
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmuls_laneq_f32
#define vmuls_laneq_f32(a, b, lane) simde_vmuls_laneq_f32(a, b, lane)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a),
b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_f32
#define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a),
b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float64x1_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_f64
#define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int16x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_s16
#define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_s32
#define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint16x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_u16
#define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_lane_u32
#define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmul_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a);
simde_int16x8_private
b_ = simde_int16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int16x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_s16(a, b, lane) vmul_laneq_s16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_s16
#define vmul_laneq_s16(a, b, lane) simde_vmul_laneq_s16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmul_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a);
simde_int32x4_private
b_ = simde_int32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_s32(a, b, lane) vmul_laneq_s32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_s32
#define vmul_laneq_s32(a, b, lane) simde_vmul_laneq_s32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmul_laneq_u16(simde_uint16x4_t a, simde_uint16x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a);
simde_uint16x8_private
b_ = simde_uint16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint16x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_u16(a, b, lane) vmul_laneq_u16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_u16
#define vmul_laneq_u16(a, b, lane) simde_vmul_laneq_u16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmul_laneq_u32(simde_uint32x2_t a, simde_uint32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a);
simde_uint32x4_private
b_ = simde_uint32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_u32(a, b, lane) vmul_laneq_u32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_u32
#define vmul_laneq_u32(a, b, lane) simde_vmul_laneq_u32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a);
simde_float32x2_private b_ = simde_float32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_f32
#define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a);
simde_float64x1_private b_ = simde_float64x1_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float64x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_f64
#define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a);
simde_int16x4_private b_ = simde_int16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int16x8_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_s16
#define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a);
simde_int32x2_private b_ = simde_int32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_s32
#define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a);
simde_uint16x4_private b_ = simde_uint16x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint16x8_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_u16
#define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a);
simde_uint32x2_private b_ = simde_uint32x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_lane_u32
#define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_f32
#define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float64x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_f64
#define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int16x8_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_s16
#define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_int32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_s32
#define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint16x8_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_u16
#define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_uint32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_laneq_u32
#define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmul_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a);
simde_float32x4_private b_ = simde_float32x4_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float32x2_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_f32(a, b, lane) vmul_laneq_f32((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_f32
#define vmul_laneq_f32(a, b, lane) simde_vmul_laneq_f32((a), (b), (lane))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmul_laneq_f64(simde_float64x1_t a, simde_float64x2_t b, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a);
simde_float64x2_private b_ = simde_float64x2_to_private(b);
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] * b_.values[lane];
}
return simde_float64x1_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmul_laneq_f64(a, b, lane) vmul_laneq_f64((a), (b), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_laneq_f64
#define vmul_laneq_f64(a, b, lane) simde_vmul_laneq_f64((a), (b), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */

View File

@@ -0,0 +1,210 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MUL_N_H)
#define SIMDE_ARM_NEON_MUL_N_H
#include "types.h"
#include "mul.h"
#include "dup_n.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vmul_n_f32(simde_float32x2_t a, simde_float32 b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_n_f32(a, b);
#else
return simde_vmul_f32(a, simde_vdup_n_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_n_f32
#define vmul_n_f32(a, b) simde_vmul_n_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vmul_n_f64(simde_float64x1_t a, simde_float64 b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmul_n_f64(a, b);
#else
return simde_vmul_f64(a, simde_vdup_n_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmul_n_f64
#define vmul_n_f64(a, b) simde_vmul_n_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmul_n_s16(simde_int16x4_t a, int16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_n_s16(a, b);
#else
return simde_vmul_s16(a, simde_vdup_n_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_n_s16
#define vmul_n_s16(a, b) simde_vmul_n_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmul_n_s32(simde_int32x2_t a, int32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_n_s32(a, b);
#else
return simde_vmul_s32(a, simde_vdup_n_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_n_s32
#define vmul_n_s32(a, b) simde_vmul_n_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmul_n_u16(simde_uint16x4_t a, uint16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_n_u16(a, b);
#else
return simde_vmul_u16(a, simde_vdup_n_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_n_u16
#define vmul_n_u16(a, b) simde_vmul_n_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmul_n_u32(simde_uint32x2_t a, uint32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmul_n_u32(a, b);
#else
return simde_vmul_u32(a, simde_vdup_n_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmul_n_u32
#define vmul_n_u32(a, b) simde_vmul_n_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vmulq_n_f32(simde_float32x4_t a, simde_float32 b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_n_f32(a, b);
#else
return simde_vmulq_f32(a, simde_vdupq_n_f32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_f32
#define vmulq_n_f32(a, b) simde_vmulq_n_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vmulq_n_f64(simde_float64x2_t a, simde_float64 b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmulq_n_f64(a, b);
#else
return simde_vmulq_f64(a, simde_vdupq_n_f64(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_f64
#define vmulq_n_f64(a, b) simde_vmulq_n_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmulq_n_s16(simde_int16x8_t a, int16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_n_s16(a, b);
#else
return simde_vmulq_s16(a, simde_vdupq_n_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_s16
#define vmulq_n_s16(a, b) simde_vmulq_n_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmulq_n_s32(simde_int32x4_t a, int32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_n_s32(a, b);
#else
return simde_vmulq_s32(a, simde_vdupq_n_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_s32
#define vmulq_n_s32(a, b) simde_vmulq_n_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmulq_n_u16(simde_uint16x8_t a, uint16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_n_u16(a, b);
#else
return simde_vmulq_u16(a, simde_vdupq_n_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_u16
#define vmulq_n_u16(a, b) simde_vmulq_n_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmulq_n_u32(simde_uint32x4_t a, uint32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmulq_n_u32(a, b);
#else
return simde_vmulq_u32(a, simde_vdupq_n_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmulq_n_u32
#define vmulq_n_u32(a, b) simde_vmulq_n_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MUL_N_H) */

View File

@@ -0,0 +1,236 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MULL_H)
#define SIMDE_ARM_NEON_MULL_H
#include "types.h"
#include "mul.h"
#include "movl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmull_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_s8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b));
#else
simde_int16x8_private r_;
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) * HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_s8
#define vmull_s8(a, b) simde_vmull_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmull_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b));
#else
simde_int32x4_private r_;
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b_.values[i]);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_s16
#define vmull_s16(a, b) simde_vmull_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmull_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_s32(a, b);
#else
simde_int64x2_private r_;
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_s32
#define vmull_s32(a, b) simde_vmull_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmull_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_u8(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b));
#else
simde_uint16x8_private r_;
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.values[i]);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_u8
#define vmull_u8(a, b) simde_vmull_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmull_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_u16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b));
#else
simde_uint32x4_private r_;
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_u16
#define vmull_u16(a, b) simde_vmull_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmull_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_u32(a, b);
#else
simde_uint64x2_private r_;
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
__typeof__(r_.values) av, bv;
SIMDE_CONVERT_VECTOR_(av, a_.values);
SIMDE_CONVERT_VECTOR_(bv, b_.values);
r_.values = av * bv;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.values[i]);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_u32
#define vmull_u32(a, b) simde_vmull_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */

View File

@@ -0,0 +1,125 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MULL_HIGH_H)
#define SIMDE_ARM_NEON_MULL_HIGH_H
#include "types.h"
#include "mul.h"
#include "movl_high.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmull_high_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_s8(a, b);
#else
return simde_vmulq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_s8
#define vmull_high_s8(a, b) simde_vmull_high_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_s16(a, b);
#else
return simde_vmulq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_s16
#define vmull_high_s16(a, b) simde_vmull_high_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_s32(a, b);
#else
return simde_x_vmulq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_s32
#define vmull_high_s32(a, b) simde_vmull_high_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmull_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_u8(a, b);
#else
return simde_vmulq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_u8
#define vmull_high_u8(a, b) simde_vmull_high_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmull_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_u16(a, b);
#else
return simde_vmulq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_u16
#define vmull_high_u16(a, b) simde_vmull_high_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmull_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vmull_high_u32(a, b);
#else
return simde_x_vmulq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_high_u32
#define vmull_high_u32(a, b) simde_vmull_high_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_H) */

View File

@@ -0,0 +1,120 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2021 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_MULL_LANE_H)
#define SIMDE_ARM_NEON_MULL_LANE_H
#include "mull.h"
#include "dup_lane.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmull_lane_s16(a, v, lane) vmull_lane_s16((a), (v), (lane))
#else
#define simde_vmull_lane_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_lane_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_lane_s16
#define vmull_lane_s16(a, v, lane) simde_vmull_lane_s16((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmull_lane_s32(a, v, lane) vmull_lane_s32((a), (v), (lane))
#else
#define simde_vmull_lane_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_lane_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_lane_s32
#define vmull_lane_s32(a, v, lane) simde_vmull_lane_s32((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmull_lane_u16(a, v, lane) vmull_lane_u16((a), (v), (lane))
#else
#define simde_vmull_lane_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_lane_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_lane_u16
#define vmull_lane_u16(a, v, lane) simde_vmull_lane_u16((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define simde_vmull_lane_u32(a, v, lane) vmull_lane_u32((a), (v), (lane))
#else
#define simde_vmull_lane_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_lane_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_lane_u32
#define vmull_lane_u32(a, v, lane) simde_vmull_lane_u32((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmull_laneq_s16(a, v, lane) vmull_laneq_s16((a), (v), (lane))
#else
#define simde_vmull_laneq_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_laneq_s16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_laneq_s16
#define vmull_laneq_s16(a, v, lane) simde_vmull_laneq_s16((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmull_laneq_s32(a, v, lane) vmull_laneq_s32((a), (v), (lane))
#else
#define simde_vmull_laneq_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_laneq_s32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_laneq_s32
#define vmull_laneq_s32(a, v, lane) simde_vmull_laneq_s32((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmull_laneq_u16(a, v, lane) vmull_laneq_u16((a), (v), (lane))
#else
#define simde_vmull_laneq_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_laneq_u16((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_laneq_u16
#define vmull_laneq_u16(a, v, lane) simde_vmull_laneq_u16((a), (v), (lane))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vmull_laneq_u32(a, v, lane) vmull_laneq_u32((a), (v), (lane))
#else
#define simde_vmull_laneq_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_laneq_u32((v), (lane)))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vmull_laneq_u32
#define vmull_laneq_u32(a, v, lane) simde_vmull_laneq_u32((a), (v), (lane))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MULL_LANE_H) */

View File

@@ -0,0 +1,158 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_MULL_N_H)
#define SIMDE_ARM_NEON_MULL_N_H
#include "types.h"
#include "mul_n.h"
#include "movl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmull_n_s16(simde_int16x4_t a, int16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_n_s16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_n_s32(simde_vmovl_s16(a), b);
#else
simde_int32x4_private r_;
simde_int16x4_private a_ = simde_int16x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av;
SIMDE_CONVERT_VECTOR_(av, a_.values);
r_.values = av * b;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_n_s16
#define vmull_n_s16(a, b) simde_vmull_n_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vmull_n_s32(simde_int32x2_t a, int32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_n_s32(a, b);
#else
simde_int64x2_private r_;
simde_int32x2_private a_ = simde_int32x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av;
SIMDE_CONVERT_VECTOR_(av, a_.values);
r_.values = av * b;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b);
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_n_s32
#define vmull_n_s32(a, b) simde_vmull_n_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmull_n_u16(simde_uint16x4_t a, uint16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_n_u16(a, b);
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
return simde_vmulq_n_u32(simde_vmovl_u16(a), b);
#else
simde_uint32x4_private r_;
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
__typeof__(r_.values) av;
SIMDE_CONVERT_VECTOR_(av, a_.values);
r_.values = av * b;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_n_u16
#define vmull_n_u16(a, b) simde_vmull_n_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vmull_n_u32(simde_uint32x2_t a, uint32_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmull_n_u32(a, b);
#else
simde_uint64x2_private r_;
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
__typeof__(r_.values) av;
SIMDE_CONVERT_VECTOR_(av, a_.values);
r_.values = av * b;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b);
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmull_n_u32
#define vmull_n_u32(a, b) simde_vmull_n_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */

View File

@@ -0,0 +1,426 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_MVN_H)
#define SIMDE_ARM_NEON_MVN_H
#include "combine.h"
#include "get_low.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vmvnq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_s8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_s8
#define vmvnq_s8(a) simde_vmvnq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vmvnq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_s16(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_s16
#define vmvnq_s16(a) simde_vmvnq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vmvnq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_s32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_s32
#define vmvnq_s32(a) simde_vmvnq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vmvnq_u8(simde_uint8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_u8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_u8
#define vmvnq_u8(a) simde_vmvnq_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vmvnq_u16(simde_uint16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_u16(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_u16
#define vmvnq_u16(a) simde_vmvnq_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vmvnq_u32(simde_uint32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvnq_u32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_nor(a, a);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a);
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_not(a_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvnq_u32
#define vmvnq_u32(a) simde_vmvnq_u32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vmvn_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_s8(a);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_s8
#define vmvn_s8(a) simde_vmvn_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vmvn_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_s16(a);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_s16
#define vmvn_s16(a) simde_vmvn_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vmvn_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_s32(a);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_s32
#define vmvn_s32(a) simde_vmvn_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vmvn_u8(simde_uint8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_u8(a);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_u8
#define vmvn_u8(a) simde_vmvn_u8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vmvn_u16(simde_uint16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_u16(a);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_u16
#define vmvn_u16(a) simde_vmvn_u16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vmvn_u32(simde_uint32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmvn_u32(a);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = ~a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = ~(a_.values[i]);
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vmvn_u32
#define vmvn_u32(a) simde_vmvn_u32(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_MVN_H) */

View File

@@ -0,0 +1,413 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_NEG_H)
#define SIMDE_ARM_NEON_NEG_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vnegd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0))
return vnegd_s64(a);
#else
return -a;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vnegd_s64
#define vnegd_s64(a) simde_vnegd_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vneg_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vneg_f32(a);
#else
simde_float32x2_private
r_,
a_ = simde_float32x2_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_float32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vneg_f32
#define vneg_f32(a) simde_vneg_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vneg_f64(simde_float64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vneg_f64(a);
#else
simde_float64x1_private
r_,
a_ = simde_float64x1_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_float64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vneg_f64
#define vneg_f64(a) simde_vneg_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vneg_s8(simde_int8x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vneg_s8(a);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vneg_s8
#define vneg_s8(a) simde_vneg_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vneg_s16(simde_int16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vneg_s16(a);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vneg_s16
#define vneg_s16(a) simde_vneg_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vneg_s32(simde_int32x2_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vneg_s32(a);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vneg_s32
#define vneg_s32(a) simde_vneg_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vneg_s64(simde_int64x1_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vneg_s64(a);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vnegd_s64(a_.values[i]);
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vneg_s64
#define vneg_s64(a) simde_vneg_s64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vnegq_f32(simde_float32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vnegq_f32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f32x4_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128 = _mm_castsi128_ps(_mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), _mm_castps_si128(a_.m128)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_float32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vnegq_f32
#define vnegq_f32(a) simde_vnegq_f32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vnegq_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vnegq_f64(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_f64x2_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128d = _mm_castsi128_pd(_mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), _mm_castpd_si128(a_.m128d)));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_float64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vnegq_f64
#define vnegq_f64(a) simde_vnegq_f64(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vnegq_s8(simde_int8x16_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vnegq_s8(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i8x16_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vnegq_s8
#define vnegq_s8(a) simde_vnegq_s8(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vnegq_s16(simde_int16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vnegq_s16(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i16x8_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vnegq_s16
#define vnegq_s16(a) simde_vnegq_s16(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vnegq_s32(simde_int32x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vnegq_s32(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i32x4_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = -(a_.values[i]);
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vnegq_s32
#define vnegq_s32(a) simde_vnegq_s32(a)
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vnegq_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vnegq_s64(a);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
return vec_neg(a);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a);
#if defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_i64x2_neg(a_.v128);
#elif defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.m128i);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = -a_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vnegd_s64(a_.values[i]);
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vnegq_s64
#define vnegq_s64(a) simde_vnegq_s64(a)
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_NEG_H) */

View File

@@ -0,0 +1,505 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
#if !defined(SIMDE_ARM_NEON_ORN_H)
#define SIMDE_ARM_NEON_ORN_H
#include "orr.h"
#include "mvn.h"
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vorn_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_s8(a, b);
#else
simde_int8x8_private
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_s8
#define vorn_s8(a, b) simde_vorn_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vorn_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_s16(a, b);
#else
simde_int16x4_private
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_s16
#define vorn_s16(a, b) simde_vorn_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vorn_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_s32(a, b);
#else
simde_int32x2_private
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_s32
#define vorn_s32(a, b) simde_vorn_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vorn_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_s64(a, b);
#else
simde_int64x1_private
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_s64
#define vorn_s64(a, b) simde_vorn_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vorn_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_u8(a, b);
#else
simde_uint8x8_private
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_u8
#define vorn_u8(a, b) simde_vorn_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vorn_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_u16(a, b);
#else
simde_uint16x4_private
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_u16
#define vorn_u16(a, b) simde_vorn_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vorn_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_u32(a, b);
#else
simde_uint32x2_private
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_u32
#define vorn_u32(a, b) simde_vorn_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vorn_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorn_u64(a, b);
#else
simde_uint64x1_private
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b),
r_;
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorn_u64
#define vorn_u64(a, b) simde_vorn_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vornq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_int8x16_private
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_s8
#define vornq_s8(a, b) simde_vornq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vornq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_int16x8_private
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_s16
#define vornq_s16(a, b) simde_vornq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vornq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_int32x4_private
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_s32
#define vornq_s32(a, b) simde_vornq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vornq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_int64x2_private
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_s64
#define vornq_s64(a, b) simde_vornq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vornq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_uint8x16_private
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_u8
#define vornq_u8(a, b) simde_vornq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vornq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_uint16x8_private
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_u16
#define vornq_u16(a, b) simde_vornq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vornq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_uint32x4_private
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_u32
#define vornq_u32(a, b) simde_vornq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vornq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vornq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
return vec_orc(a, b);
#else
simde_uint64x2_private
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b),
r_;
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | ~(b_.values);
#else
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | ~b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vornq_u64
#define vornq_u64(a, b) simde_vornq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ORN_H) */

View File

@@ -0,0 +1,552 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#if !defined(SIMDE_ARM_NEON_ORR_H)
#define SIMDE_ARM_NEON_ORR_H
#include "types.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vorr_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_s8(a, b);
#else
simde_int8x8_private
r_,
a_ = simde_int8x8_to_private(a),
b_ = simde_int8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_s8
#define vorr_s8(a, b) simde_vorr_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vorr_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_s16(a, b);
#else
simde_int16x4_private
r_,
a_ = simde_int16x4_to_private(a),
b_ = simde_int16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_s16
#define vorr_s16(a, b) simde_vorr_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vorr_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_s32(a, b);
#else
simde_int32x2_private
r_,
a_ = simde_int32x2_to_private(a),
b_ = simde_int32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_s32
#define vorr_s32(a, b) simde_vorr_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vorr_s64(simde_int64x1_t a, simde_int64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_s64(a, b);
#else
simde_int64x1_private
r_,
a_ = simde_int64x1_to_private(a),
b_ = simde_int64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_s64
#define vorr_s64(a, b) simde_vorr_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vorr_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_u8(a, b);
#else
simde_uint8x8_private
r_,
a_ = simde_uint8x8_to_private(a),
b_ = simde_uint8x8_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint8x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_u8
#define vorr_u8(a, b) simde_vorr_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vorr_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_u16(a, b);
#else
simde_uint16x4_private
r_,
a_ = simde_uint16x4_to_private(a),
b_ = simde_uint16x4_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint16x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_u16
#define vorr_u16(a, b) simde_vorr_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vorr_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_u32(a, b);
#else
simde_uint32x2_private
r_,
a_ = simde_uint32x2_to_private(a),
b_ = simde_uint32x2_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint32x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_u32
#define vorr_u32(a, b) simde_vorr_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vorr_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorr_u64(a, b);
#else
simde_uint64x1_private
r_,
a_ = simde_uint64x1_to_private(a),
b_ = simde_uint64x1_to_private(b);
#if defined(SIMDE_X86_MMX_NATIVE)
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint64x1_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorr_u64
#define vorr_u64(a, b) simde_vorr_u64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vorrq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_s8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_int8x16_private
r_,
a_ = simde_int8x16_to_private(a),
b_ = simde_int8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_s8
#define vorrq_s8(a, b) simde_vorrq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vorrq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_s16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_s16
#define vorrq_s16(a, b) simde_vorrq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vorrq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_s32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_s32
#define vorrq_s32(a, b) simde_vorrq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vorrq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_s64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_or(a, b);
#else
simde_int64x2_private
r_,
a_ = simde_int64x2_to_private(a),
b_ = simde_int64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_int64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_s64
#define vorrq_s64(a, b) simde_vorrq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vorrq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_u8(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_uint8x16_private
r_,
a_ = simde_uint8x16_to_private(a),
b_ = simde_uint8x16_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint8x16_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_u8
#define vorrq_u8(a, b) simde_vorrq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vorrq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_u16(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_uint16x8_private
r_,
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint16x8_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_u16
#define vorrq_u16(a, b) simde_vorrq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vorrq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_u32(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_or(a, b);
#else
simde_uint32x4_private
r_,
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint32x4_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_u32
#define vorrq_u32(a, b) simde_vorrq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vorrq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vorrq_u64(a, b);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
return vec_or(a, b);
#else
simde_uint64x2_private
r_,
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);
#if defined(SIMDE_X86_SSE2_NATIVE)
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.values = a_.values | b_.values;
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = a_.values[i] | b_.values[i];
}
#endif
return simde_uint64x2_from_private(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vorrq_u64
#define vorrq_u64(a, b) simde_vorrq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_ORR_H) */

View File

@@ -0,0 +1,211 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_PADAL_H)
#define SIMDE_ARM_NEON_PADAL_H
#include "types.h"
#include "add.h"
#include "paddl.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vpadal_s8(simde_int16x4_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_s8(a, b);
#else
return simde_vadd_s16(a, simde_vpaddl_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_s8
#define vpadal_s8(a, b) simde_vpadal_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vpadal_s16(simde_int32x2_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_s16(a, b);
#else
return simde_vadd_s32(a, simde_vpaddl_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_s16
#define vpadal_s16(a, b) simde_vpadal_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x1_t
simde_vpadal_s32(simde_int64x1_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_s32(a, b);
#else
return simde_vadd_s64(a, simde_vpaddl_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_s32
#define vpadal_s32(a, b) simde_vpadal_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vpadal_u8(simde_uint16x4_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_u8(a, b);
#else
return simde_vadd_u16(a, simde_vpaddl_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_u8
#define vpadal_u8(a, b) simde_vpadal_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vpadal_u16(simde_uint32x2_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_u16(a, b);
#else
return simde_vadd_u32(a, simde_vpaddl_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_u16
#define vpadal_u16(a, b) simde_vpadal_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x1_t
simde_vpadal_u32(simde_uint64x1_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadal_u32(a, b);
#else
return simde_vadd_u64(a, simde_vpaddl_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadal_u32
#define vpadal_u32(a, b) simde_vpadal_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vpadalq_s8(simde_int16x8_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_s8(a, b);
#else
return simde_vaddq_s16(a, simde_vpaddlq_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_s8
#define vpadalq_s8(a, b) simde_vpadalq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vpadalq_s16(simde_int32x4_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_s16(a, b);
#else
return simde_vaddq_s32(a, simde_vpaddlq_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_s16
#define vpadalq_s16(a, b) simde_vpadalq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vpadalq_s32(simde_int64x2_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_s32(a, b);
#else
return simde_vaddq_s64(a, simde_vpaddlq_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_s32
#define vpadalq_s32(a, b) simde_vpadalq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vpadalq_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_u8(a, b);
#else
return simde_vaddq_u16(a, simde_vpaddlq_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_u8
#define vpadalq_u8(a, b) simde_vpadalq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vpadalq_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_u16(a, b);
#else
return simde_vaddq_u32(a, simde_vpaddlq_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_u16
#define vpadalq_u16(a, b) simde_vpadalq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vpadalq_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadalq_u32(a, b);
#else
return simde_vaddq_u64(a, simde_vpaddlq_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadalq_u32
#define vpadalq_u32(a, b) simde_vpadalq_u32((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* SIMDE_ARM_NEON_PADAL_H */

View File

@@ -0,0 +1,388 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020-2021 Evan Nemerson <evan@nemerson.com>
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
*/
#if !defined(SIMDE_ARM_NEON_PADD_H)
#define SIMDE_ARM_NEON_PADD_H
#include "add.h"
#include "uzp1.h"
#include "uzp2.h"
#include "types.h"
#include "get_lane.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vpaddd_s64(simde_int64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddd_s64(a);
#else
return simde_vaddd_s64(simde_vgetq_lane_s64(a, 0), simde_vgetq_lane_s64(a, 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vpaddd_s64
#define vpaddd_s64(a) simde_vpaddd_s64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vpaddd_u64(simde_uint64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddd_u64(a);
#else
return simde_vaddd_u64(simde_vgetq_lane_u64(a, 0), simde_vgetq_lane_u64(a, 1));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vpaddd_u64
#define vpaddd_u64(a) simde_vpaddd_u64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vpaddd_f64(simde_float64x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddd_f64(a);
#else
simde_float64x2_private a_ = simde_float64x2_to_private(a);
return a_.values[0] + a_.values[1];
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vpaddd_f64
#define vpaddd_f64(a) simde_vpaddd_f64((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vpadds_f32(simde_float32x2_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpadds_f32(a);
#else
simde_float32x2_private a_ = simde_float32x2_to_private(a);
return a_.values[0] + a_.values[1];
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vpadds_f32
#define vpadds_f32(a) simde_vpadds_f32((a))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vpadd_f32(simde_float32x2_t a, simde_float32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0)
return vpadd_f32(a, b);
#else
return simde_vadd_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_f32
#define vpadd_f32(a, b) simde_vpadd_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x8_t
simde_vpadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_s8(a, b);
#else
return simde_vadd_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_s8
#define vpadd_s8(a, b) simde_vpadd_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x4_t
simde_vpadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_s16(a, b);
#elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
return simde_int16x4_from_m64(_mm_hadd_pi16(simde_int16x4_to_m64(a), simde_int16x4_to_m64(b)));
#else
return simde_vadd_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_s16
#define vpadd_s16(a, b) simde_vpadd_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x2_t
simde_vpadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_s32(a, b);
#elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
return simde_int32x2_from_m64(_mm_hadd_pi32(simde_int32x2_to_m64(a), simde_int32x2_to_m64(b)));
#else
return simde_vadd_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_s32
#define vpadd_s32(a, b) simde_vpadd_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x8_t
simde_vpadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_u8(a, b);
#else
return simde_vadd_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_u8
#define vpadd_u8(a, b) simde_vpadd_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vpadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_u16(a, b);
#else
return simde_vadd_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_u16
#define vpadd_u16(a, b) simde_vpadd_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x2_t
simde_vpadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vpadd_u32(a, b);
#else
return simde_vadd_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpadd_u32
#define vpadd_u32(a, b) simde_vpadd_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vpaddq_f32(simde_float32x4_t a, simde_float32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_f32(a, b);
#elif defined(SIMDE_X86_SSE3_NATIVE)
simde_float32x4_private
r_,
a_ = simde_float32x4_to_private(a),
b_ = simde_float32x4_to_private(b);
#if defined(SIMDE_X86_SSE3_NATIVE)
r_.m128 = _mm_hadd_ps(a_.m128, b_.m128);
#endif
return simde_float32x4_from_private(r_);
#else
return simde_vaddq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_f32
#define vpaddq_f32(a, b) simde_vpaddq_f32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vpaddq_f64(simde_float64x2_t a, simde_float64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_f64(a, b);
#elif defined(SIMDE_X86_SSE3_NATIVE)
simde_float64x2_private
r_,
a_ = simde_float64x2_to_private(a),
b_ = simde_float64x2_to_private(b);
#if defined(SIMDE_X86_SSE3_NATIVE)
r_.m128d = _mm_hadd_pd(a_.m128d, b_.m128d);
#endif
return simde_float64x2_from_private(r_);
#else
return simde_vaddq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vpaddq_f64
#define vpaddq_f64(a, b) simde_vpaddq_f64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int8x16_t
simde_vpaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_s8(a, b);
#else
return simde_vaddq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_s8
#define vpaddq_s8(a, b) simde_vpaddq_s8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vpaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_s16(a, b);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
simde_int16x8_private
r_,
a_ = simde_int16x8_to_private(a),
b_ = simde_int16x8_to_private(b);
#if defined(SIMDE_X86_SSSE3_NATIVE)
r_.m128i = _mm_hadd_epi16(a_.m128i, b_.m128i);
#endif
return simde_int16x8_from_private(r_);
#else
return simde_vaddq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_s16
#define vpaddq_s16(a, b) simde_vpaddq_s16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vpaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_s32(a, b);
#elif defined(SIMDE_X86_SSSE3_NATIVE)
simde_int32x4_private
r_,
a_ = simde_int32x4_to_private(a),
b_ = simde_int32x4_to_private(b);
#if defined(SIMDE_X86_SSSE3_NATIVE)
r_.m128i = _mm_hadd_epi32(a_.m128i, b_.m128i);
#endif
return simde_int32x4_from_private(r_);
#else
return simde_vaddq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_s32
#define vpaddq_s32(a, b) simde_vpaddq_s32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vpaddq_s64(simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_s64(a, b);
#else
return simde_vaddq_s64(simde_vuzp1q_s64(a, b), simde_vuzp2q_s64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_s64
#define vpaddq_s64(a, b) simde_vpaddq_s64((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint8x16_t
simde_vpaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_u8(a, b);
#else
return simde_vaddq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_u8
#define vpaddq_u8(a, b) simde_vpaddq_u8((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vpaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_u16(a, b);
#else
return simde_vaddq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_u16
#define vpaddq_u16(a, b) simde_vpaddq_u16((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vpaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_u32(a, b);
#else
return simde_vaddq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_u32
#define vpaddq_u32(a, b) simde_vpaddq_u32((a), (b))
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vpaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vpaddq_u64(a, b);
#else
return simde_vaddq_u64(simde_vuzp1q_u64(a, b), simde_vuzp2q_u64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vpaddq_u64
#define vpaddq_u64(a, b) simde_vpaddq_u64((a), (b))
#endif
SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_ARM_NEON_PADD_H) */

Some files were not shown because too many files have changed in this diff Show More