Add simde
This commit is contained in:
159
lib/simd_wrapper/simd_wrapper.h
Normal file
159
lib/simd_wrapper/simd_wrapper.h
Normal file
@@ -0,0 +1,159 @@
|
||||
/* ==========================================================================
|
||||
* Copyright (c) 2022 SuperTuxKart-Team
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to permit
|
||||
* persons to whom the Software is furnished to do so, subject to the
|
||||
* following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
||||
* NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* ==========================================================================
|
||||
*/
|
||||
#ifndef HEADER_SIMD_WRAPPER_HPP
|
||||
#define HEADER_SIMD_WRAPPER_HPP
|
||||
|
||||
#include <simde/simde-arch.h>
|
||||
#if defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_X86)
|
||||
// Native SSE
|
||||
#if __MMX__ || CPU_ENABLE_MMX
|
||||
#include <mmintrin.h>
|
||||
#define CPU_MMX_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 1 ) ) || CPU_ENABLE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#define CPU_SSE_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE2__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 2 ) ) || CPU_ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define CPU_SSE2_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#define CPU_SSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#define CPU_SSSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#define CPU_SSE4_1_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_2__ || CPU_ENABLE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#define CPU_SSE4_2_SUPPORT (1)
|
||||
#endif
|
||||
|
||||
#elif defined(SIMDE_ARCH_ARM_NEON)
|
||||
// We only enable compile time SSE* to Neon for now because it's easy to test
|
||||
// Enable up to SSE4.2 because after that (starting from AVX) it has few
|
||||
// native conversion, which will use the slower C99 fallback
|
||||
#define CPU_MMX_SUPPORT (1)
|
||||
#define CPU_SSE_SUPPORT (1)
|
||||
#define CPU_SSE2_SUPPORT (1)
|
||||
#define CPU_SSE3_SUPPORT (1)
|
||||
#define CPU_SSSE3_SUPPORT (1)
|
||||
#define CPU_SSE4_1_SUPPORT (1)
|
||||
#define CPU_SSE4_2_SUPPORT (1)
|
||||
|
||||
#if defined(_MSC_VER) && defined(__cplusplus)
|
||||
// Fix math related functions missing in msvc
|
||||
#include <cmath>
|
||||
#endif
|
||||
|
||||
#define SIMDE_ENABLE_NATIVE_ALIASES
|
||||
#include "simde/x86/sse4.2.h"
|
||||
#endif
|
||||
|
||||
#ifndef _MM_FROUND_TO_NEG_INF
|
||||
#define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
|
||||
#endif
|
||||
|
||||
#ifndef _MM_FROUND_NO_EXC
|
||||
#define _MM_FROUND_NO_EXC SIMDE_MM_FROUND_NO_EXC
|
||||
#endif
|
||||
|
||||
#ifndef _MM_SET_ROUNDING_MODE
|
||||
#define _MM_SET_ROUNDING_MODE _MM_SET_ROUNDING_MODE
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_NEAREST
|
||||
#define _MM_ROUND_NEAREST SIMDE_MM_ROUND_NEAREST
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_UP
|
||||
#define _MM_ROUND_UP SIMDE_MM_ROUND_UP
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_DOWN
|
||||
#define _MM_ROUND_DOWN SIMDE_MM_ROUND_DOWN
|
||||
#endif
|
||||
|
||||
// Utilities for aligned allocation
|
||||
inline void* simd_aligned_alloc(size_t alignment, size_t bytes)
|
||||
{
|
||||
// we need to allocate enough storage for the requested bytes, some
|
||||
// book-keeping (to store the location returned by malloc) and some extra
|
||||
// padding to allow us to find an aligned byte. I'm not entirely sure if
|
||||
// 2 * alignment is enough here, its just a guess.
|
||||
const size_t total_size = bytes + (2 * alignment) + sizeof(size_t);
|
||||
|
||||
// use malloc to allocate the memory.
|
||||
char* data = (char*)malloc(sizeof(char) * total_size);
|
||||
|
||||
if (data)
|
||||
{
|
||||
// store the original start of the malloc'd data.
|
||||
const void* const data_start = data;
|
||||
|
||||
// dedicate enough space to the book-keeping.
|
||||
data += sizeof(size_t);
|
||||
|
||||
// find a memory location with correct alignment. the alignment minus
|
||||
// the remainder of this mod operation is how many bytes forward we need
|
||||
// to move to find an aligned byte.
|
||||
const size_t offset = alignment - (((size_t)data) % alignment);
|
||||
|
||||
// set data to the aligned memory.
|
||||
data += offset;
|
||||
|
||||
// write the book-keeping.
|
||||
size_t* book_keeping = (size_t*)(data - sizeof(size_t));
|
||||
*book_keeping = (size_t)data_start;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
inline void simd_aligned_free(void* raw_data)
|
||||
{
|
||||
if (raw_data)
|
||||
{
|
||||
char* data = (char*)raw_data;
|
||||
|
||||
// we have to assume this memory was allocated with simd_aligned_alloc.
|
||||
// this means the sizeof(size_t) bytes before data are the book-keeping
|
||||
// which points to the location we need to pass to free.
|
||||
data -= sizeof(size_t);
|
||||
|
||||
// set data to the location stored in book-keeping.
|
||||
data = (char*)(*((size_t*)data));
|
||||
|
||||
// free the memory.
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
20
lib/simd_wrapper/simde/COPYING
Normal file
20
lib/simd_wrapper/simde/COPYING
Normal file
@@ -0,0 +1,20 @@
|
||||
Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
10
lib/simd_wrapper/simde/README.md
Normal file
10
lib/simd_wrapper/simde/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# SIMDe Without Test Cases
|
||||
|
||||
This repository contains only the core of
|
||||
[SIMDe](https://github.com/simd-everywhere/simde).
|
||||
It is generated automatically for every commit to master, and is
|
||||
intended to be used as a submodule in projects which don't want to
|
||||
include the (rather large) test cases.
|
||||
|
||||
All development work happens in the main repository, please do not
|
||||
file issues or create pull requests against this repository.
|
||||
210
lib/simd_wrapper/simde/arm/neon.h
Normal file
210
lib/simd_wrapper/simde/arm/neon.h
Normal file
@@ -0,0 +1,210 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_H)
|
||||
#define SIMDE_ARM_NEON_H
|
||||
|
||||
#include "neon/types.h"
|
||||
|
||||
#include "neon/aba.h"
|
||||
#include "neon/abd.h"
|
||||
#include "neon/abdl.h"
|
||||
#include "neon/abs.h"
|
||||
#include "neon/add.h"
|
||||
#include "neon/addhn.h"
|
||||
#include "neon/addl.h"
|
||||
#include "neon/addlv.h"
|
||||
#include "neon/addl_high.h"
|
||||
#include "neon/addv.h"
|
||||
#include "neon/addw.h"
|
||||
#include "neon/addw_high.h"
|
||||
#include "neon/and.h"
|
||||
#include "neon/bcax.h"
|
||||
#include "neon/bic.h"
|
||||
#include "neon/bsl.h"
|
||||
#include "neon/cage.h"
|
||||
#include "neon/cagt.h"
|
||||
#include "neon/ceq.h"
|
||||
#include "neon/ceqz.h"
|
||||
#include "neon/cge.h"
|
||||
#include "neon/cgez.h"
|
||||
#include "neon/cgt.h"
|
||||
#include "neon/cgtz.h"
|
||||
#include "neon/cle.h"
|
||||
#include "neon/clez.h"
|
||||
#include "neon/cls.h"
|
||||
#include "neon/clt.h"
|
||||
#include "neon/cltz.h"
|
||||
#include "neon/clz.h"
|
||||
#include "neon/cmla.h"
|
||||
#include "neon/cmla_rot90.h"
|
||||
#include "neon/cmla_rot180.h"
|
||||
#include "neon/cmla_rot270.h"
|
||||
#include "neon/cnt.h"
|
||||
#include "neon/cvt.h"
|
||||
#include "neon/combine.h"
|
||||
#include "neon/create.h"
|
||||
#include "neon/dot.h"
|
||||
#include "neon/dot_lane.h"
|
||||
#include "neon/dup_lane.h"
|
||||
#include "neon/dup_n.h"
|
||||
#include "neon/eor.h"
|
||||
#include "neon/ext.h"
|
||||
#include "neon/fma.h"
|
||||
#include "neon/fma_lane.h"
|
||||
#include "neon/fma_n.h"
|
||||
#include "neon/get_high.h"
|
||||
#include "neon/get_lane.h"
|
||||
#include "neon/get_low.h"
|
||||
#include "neon/hadd.h"
|
||||
#include "neon/hsub.h"
|
||||
#include "neon/ld1.h"
|
||||
#include "neon/ld1_dup.h"
|
||||
#include "neon/ld1_lane.h"
|
||||
#include "neon/ld2.h"
|
||||
#include "neon/ld3.h"
|
||||
#include "neon/ld4.h"
|
||||
#include "neon/ld4_lane.h"
|
||||
#include "neon/max.h"
|
||||
#include "neon/maxnm.h"
|
||||
#include "neon/maxv.h"
|
||||
#include "neon/min.h"
|
||||
#include "neon/minnm.h"
|
||||
#include "neon/minv.h"
|
||||
#include "neon/mla.h"
|
||||
#include "neon/mla_n.h"
|
||||
#include "neon/mlal.h"
|
||||
#include "neon/mlal_high.h"
|
||||
#include "neon/mlal_high_n.h"
|
||||
#include "neon/mlal_lane.h"
|
||||
#include "neon/mlal_n.h"
|
||||
#include "neon/mls.h"
|
||||
#include "neon/mls_n.h"
|
||||
#include "neon/mlsl.h"
|
||||
#include "neon/mlsl_high.h"
|
||||
#include "neon/mlsl_high_n.h"
|
||||
#include "neon/mlsl_lane.h"
|
||||
#include "neon/mlsl_n.h"
|
||||
#include "neon/movl.h"
|
||||
#include "neon/movl_high.h"
|
||||
#include "neon/movn.h"
|
||||
#include "neon/movn_high.h"
|
||||
#include "neon/mul.h"
|
||||
#include "neon/mul_lane.h"
|
||||
#include "neon/mul_n.h"
|
||||
#include "neon/mull.h"
|
||||
#include "neon/mull_high.h"
|
||||
#include "neon/mull_lane.h"
|
||||
#include "neon/mull_n.h"
|
||||
#include "neon/mvn.h"
|
||||
#include "neon/neg.h"
|
||||
#include "neon/orn.h"
|
||||
#include "neon/orr.h"
|
||||
#include "neon/padal.h"
|
||||
#include "neon/padd.h"
|
||||
#include "neon/paddl.h"
|
||||
#include "neon/pmax.h"
|
||||
#include "neon/pmin.h"
|
||||
#include "neon/qabs.h"
|
||||
#include "neon/qadd.h"
|
||||
#include "neon/qdmulh.h"
|
||||
#include "neon/qdmulh_lane.h"
|
||||
#include "neon/qdmulh_n.h"
|
||||
#include "neon/qdmull.h"
|
||||
#include "neon/qrdmulh.h"
|
||||
#include "neon/qrdmulh_lane.h"
|
||||
#include "neon/qrdmulh_n.h"
|
||||
#include "neon/qrshrn_n.h"
|
||||
#include "neon/qrshrun_n.h"
|
||||
#include "neon/qmovn.h"
|
||||
#include "neon/qmovun.h"
|
||||
#include "neon/qmovn_high.h"
|
||||
#include "neon/qneg.h"
|
||||
#include "neon/qsub.h"
|
||||
#include "neon/qshl.h"
|
||||
#include "neon/qshlu_n.h"
|
||||
#include "neon/qshrn_n.h"
|
||||
#include "neon/qshrun_n.h"
|
||||
#include "neon/qtbl.h"
|
||||
#include "neon/qtbx.h"
|
||||
#include "neon/rbit.h"
|
||||
#include "neon/recpe.h"
|
||||
#include "neon/recps.h"
|
||||
#include "neon/reinterpret.h"
|
||||
#include "neon/rev16.h"
|
||||
#include "neon/rev32.h"
|
||||
#include "neon/rev64.h"
|
||||
#include "neon/rhadd.h"
|
||||
#include "neon/rnd.h"
|
||||
#include "neon/rndm.h"
|
||||
#include "neon/rndi.h"
|
||||
#include "neon/rndn.h"
|
||||
#include "neon/rndp.h"
|
||||
#include "neon/rshl.h"
|
||||
#include "neon/rshr_n.h"
|
||||
#include "neon/rshrn_n.h"
|
||||
#include "neon/rsqrte.h"
|
||||
#include "neon/rsqrts.h"
|
||||
#include "neon/rsra_n.h"
|
||||
#include "neon/set_lane.h"
|
||||
#include "neon/shl.h"
|
||||
#include "neon/shl_n.h"
|
||||
#include "neon/shll_n.h"
|
||||
#include "neon/shr_n.h"
|
||||
#include "neon/shrn_n.h"
|
||||
#include "neon/sqadd.h"
|
||||
#include "neon/sra_n.h"
|
||||
#include "neon/sri_n.h"
|
||||
#include "neon/st1.h"
|
||||
#include "neon/st1_lane.h"
|
||||
#include "neon/st2.h"
|
||||
#include "neon/st2_lane.h"
|
||||
#include "neon/st3.h"
|
||||
#include "neon/st3_lane.h"
|
||||
#include "neon/st4.h"
|
||||
#include "neon/st4_lane.h"
|
||||
#include "neon/sub.h"
|
||||
#include "neon/subhn.h"
|
||||
#include "neon/subl.h"
|
||||
#include "neon/subl_high.h"
|
||||
#include "neon/subw.h"
|
||||
#include "neon/subw_high.h"
|
||||
#include "neon/tbl.h"
|
||||
#include "neon/tbx.h"
|
||||
#include "neon/trn.h"
|
||||
#include "neon/trn1.h"
|
||||
#include "neon/trn2.h"
|
||||
#include "neon/tst.h"
|
||||
#include "neon/uqadd.h"
|
||||
#include "neon/uzp.h"
|
||||
#include "neon/uzp1.h"
|
||||
#include "neon/uzp2.h"
|
||||
#include "neon/xar.h"
|
||||
#include "neon/zip.h"
|
||||
#include "neon/zip1.h"
|
||||
#include "neon/zip2.h"
|
||||
|
||||
#endif /* SIMDE_ARM_NEON_H */
|
||||
208
lib/simd_wrapper/simde/arm/neon/aba.h
Normal file
208
lib/simd_wrapper/simde/arm/neon/aba.h
Normal file
@@ -0,0 +1,208 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABA_H)
|
||||
#define SIMDE_ARM_NEON_ABA_H
|
||||
|
||||
#include "abd.h"
|
||||
#include "add.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s8(simde_vabd_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s8
|
||||
#define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s16(simde_vabd_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s16
|
||||
#define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s32(simde_vabd_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s32
|
||||
#define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u8(simde_vabd_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u8
|
||||
#define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u16(simde_vabd_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u16
|
||||
#define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u32(simde_vabd_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u32
|
||||
#define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s8(simde_vabdq_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s8
|
||||
#define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vabdq_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s16
|
||||
#define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vabdq_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s32
|
||||
#define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u8(simde_vabdq_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u8
|
||||
#define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vabdq_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u16
|
||||
#define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vabdq_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u32
|
||||
#define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABA_H) */
|
||||
489
lib/simd_wrapper/simde/arm/neon/abd.h
Normal file
489
lib/simd_wrapper/simde/arm/neon/abd.h
Normal file
@@ -0,0 +1,489 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABD_H)
|
||||
#define SIMDE_ARM_NEON_ABD_H
|
||||
|
||||
#include "abs.h"
|
||||
#include "subl.h"
|
||||
#include "movn.h"
|
||||
#include "movl.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vabds_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabds_f32(a, b);
|
||||
#else
|
||||
simde_float32_t r = a - b;
|
||||
return r < 0 ? -r : r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabds_f32
|
||||
#define vabds_f32(a, b) simde_vabds_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vabdd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabdd_f64(a, b);
|
||||
#else
|
||||
simde_float64_t r = a - b;
|
||||
return r < 0 ? -r : r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdd_f64
|
||||
#define vabdd_f64(a, b) simde_vabdd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_f32(a, b);
|
||||
#else
|
||||
return simde_vabs_f32(simde_vsub_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_f32
|
||||
#define vabd_f32(a, b) simde_vabd_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabd_f64(a, b);
|
||||
#else
|
||||
return simde_vabs_f64(simde_vsub_f64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_f64
|
||||
#define vabd_f64(a, b) simde_vabd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s8(a, b);
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE)
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
const __m64 m = _mm_cmpgt_pi8(b_.m64, a_.m64);
|
||||
r_.m64 =
|
||||
_mm_xor_si64(
|
||||
_mm_add_pi8(
|
||||
_mm_sub_pi8(a_.m64, b_.m64),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s8
|
||||
#define vabd_s8(a, b) simde_vabd_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s16(a, b);
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
r_.m64 = _mm_sub_pi16(_mm_max_pi16(a_.m64, b_.m64), _mm_min_pi16(a_.m64, b_.m64));
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s16
|
||||
#define vabd_s16(a, b) simde_vabd_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s32(a, b);
|
||||
#else
|
||||
return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s32
|
||||
#define vabd_s32(a, b) simde_vabd_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u8(a, b);
|
||||
#else
|
||||
return simde_vmovn_u16(
|
||||
simde_vreinterpretq_u16_s16(
|
||||
simde_vabsq_s16(
|
||||
simde_vsubq_s16(
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u8
|
||||
#define vabd_u8(a, b) simde_vabd_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u16(a, b);
|
||||
#else
|
||||
return simde_vmovn_u32(
|
||||
simde_vreinterpretq_u32_s32(
|
||||
simde_vabsq_s32(
|
||||
simde_vsubq_s32(
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u16
|
||||
#define vabd_u16(a, b) simde_vabd_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u32(a, b);
|
||||
#else
|
||||
return simde_vmovn_u64(
|
||||
simde_vreinterpretq_u64_s64(
|
||||
simde_vabsq_s64(
|
||||
simde_vsubq_s64(
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u32
|
||||
#define vabd_u32(a, b) simde_vabd_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_f32(a, b);
|
||||
#else
|
||||
return simde_vabsq_f32(simde_vsubq_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_f32
|
||||
#define vabdq_f32(a, b) simde_vabdq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabdq_f64(a, b);
|
||||
#else
|
||||
return simde_vabsq_f64(simde_vsubq_f64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_f64
|
||||
#define vabdq_f64(a, b) simde_vabdq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_sub_epi8(_mm_max_epi8(a_.m128i, b_.m128i), _mm_min_epi8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_cmpgt_epi8(b_.m128i, a_.m128i);
|
||||
r_.m128i =
|
||||
_mm_xor_si128(
|
||||
_mm_add_epi8(
|
||||
_mm_sub_epi8(a_.m128i, b_.m128i),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_sub(wasm_i8x16_max(a_.v128, b_.v128), wasm_i8x16_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s8
|
||||
#define vabdq_s8(a, b) simde_vabdq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881658604 */
|
||||
r_.m128i = _mm_sub_epi16(_mm_max_epi16(a_.m128i, b_.m128i), _mm_min_epi16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_sub(wasm_i16x8_max(a_.v128, b_.v128), wasm_i16x8_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] =
|
||||
(a_.values[i] < b_.values[i]) ?
|
||||
(b_.values[i] - a_.values[i]) :
|
||||
(a_.values[i] - b_.values[i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s16
|
||||
#define vabdq_s16(a, b) simde_vabdq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_sub_epi32(_mm_max_epi32(a_.m128i, b_.m128i), _mm_min_epi32(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_cmpgt_epi32(b_.m128i, a_.m128i);
|
||||
r_.m128i =
|
||||
_mm_xor_si128(
|
||||
_mm_add_epi32(
|
||||
_mm_sub_epi32(a_.m128i, b_.m128i),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s32
|
||||
#define vabdq_s32(a, b) simde_vabdq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi8(_mm_max_epu8(a_.m128i, b_.m128i), _mm_min_epu8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_sub(wasm_u8x16_max(a_.v128, b_.v128), wasm_u8x16_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u8
|
||||
#define vabdq_u8(a, b) simde_vabdq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi16(_mm_max_epu16(a_.m128i, b_.m128i), _mm_min_epu16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_sub(wasm_u16x8_max(a_.v128, b_.v128), wasm_u16x8_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u16
|
||||
#define vabdq_u16(a, b) simde_vabdq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi32(_mm_max_epu32(a_.m128i, b_.m128i), _mm_min_epu32(a_.m128i, b_.m128i));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u32
|
||||
#define vabdq_u32(a, b) simde_vabdq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABD_H) */
|
||||
147
lib/simd_wrapper/simde/arm/neon/abdl.h
Normal file
147
lib/simd_wrapper/simde/arm/neon/abdl.h
Normal file
@@ -0,0 +1,147 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABDL_H)
|
||||
#define SIMDE_ARM_NEON_ABDL_H
|
||||
|
||||
#include "abs.h"
|
||||
#include "subl.h"
|
||||
#include "movl.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s8(a, b);
|
||||
#else
|
||||
return simde_vabsq_s16(simde_vsubl_s8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s8
|
||||
#define vabdl_s8(a, b) simde_vabdl_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s16(a, b);
|
||||
#else
|
||||
return simde_vabsq_s32(simde_vsubl_s16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s16
|
||||
#define vabdl_s16(a, b) simde_vabdl_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s32(a, b);
|
||||
#else
|
||||
return simde_vabsq_s64(simde_vsubl_s32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s32
|
||||
#define vabdl_s32(a, b) simde_vabdl_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u8(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u16_s16(
|
||||
simde_vabsq_s16(
|
||||
simde_vsubq_s16(
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u8
|
||||
#define vabdl_u8(a, b) simde_vabdl_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u16(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u32_s32(
|
||||
simde_vabsq_s32(
|
||||
simde_vsubq_s32(
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u16
|
||||
#define vabdl_u16(a, b) simde_vabdl_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u32(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u64_s64(
|
||||
simde_vabsq_s64(
|
||||
simde_vsubq_s64(
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u32
|
||||
#define vabdl_u32(a, b) simde_vabdl_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */
|
||||
431
lib/simd_wrapper/simde/arm/neon/abs.h
Normal file
431
lib/simd_wrapper/simde/arm/neon/abs.h
Normal file
@@ -0,0 +1,431 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABS_H)
|
||||
#define SIMDE_ARM_NEON_ABS_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vabsd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0))
|
||||
return vabsd_s64(a);
|
||||
#else
|
||||
return a < 0 ? -a : a;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsd_s64
|
||||
#define vabsd_s64(a) simde_vabsd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vabs_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_f32(a);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_f32
|
||||
#define vabs_f32(a) simde_vabs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vabs_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabs_f64(a);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_f64
|
||||
#define vabs_f64(a) simde_vabs_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vabs_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi8(a_.m64);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s8
|
||||
#define vabs_s8(a) simde_vabs_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vabs_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s16(a);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi16(a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s16
|
||||
#define vabs_s16(a) simde_vabs_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vabs_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s32(a);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi32(a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s32
|
||||
#define vabs_s32(a) simde_vabs_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vabs_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabs_s64(a);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s64
|
||||
#define vabs_s64(a) simde_vabs_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vabsq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabsq_f32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_abs(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE_NATIVE)
|
||||
simde_float32 mask_;
|
||||
uint32_t u32_ = UINT32_C(0x7FFFFFFF);
|
||||
simde_memcpy(&mask_, &u32_, sizeof(u32_));
|
||||
r_.m128 = _mm_and_ps(_mm_set1_ps(mask_), a_.m128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_math_fabsf(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_f32
|
||||
#define vabsq_f32(a) simde_vabsq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vabsq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabsq_f64(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
simde_float64 mask_;
|
||||
uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
|
||||
simde_memcpy(&mask_, &u64_, sizeof(u64_));
|
||||
r_.m128d = _mm_and_pd(_mm_set1_pd(mask_), a_.m128d);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_math_fabs(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_f64
|
||||
#define vabsq_f64(a) simde_vabsq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vabsq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabsq_s8(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
r_.m128i = _mm_abs_epi8(a_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_min_epu8(a_.m128i, _mm_sub_epi8(_mm_setzero_si128(), a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_abs(a_.v128);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_s8
|
||||
#define vabsq_s8(a) simde_vabsq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabsq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabsq_s16(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
r_.m128i = _mm_abs_epi16(a_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_max_epi16(a_.m128i, _mm_sub_epi16(_mm_setzero_si128(), a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_abs(a_.v128);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_s16
|
||||
#define vabsq_s16(a) simde_vabsq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabsq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabsq_s32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
r_.m128i = _mm_abs_epi32(a_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a_.m128i);
|
||||
r_.m128i = _mm_sub_epi32(_mm_xor_si128(a_.m128i, m), m);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_abs(a_.v128);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_s32
|
||||
#define vabsq_s32(a) simde_vabsq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vabsq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabsq_s64(a);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_abs_epi64(a_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a_.m128i, 0xF5), 31);
|
||||
r_.m128i = _mm_sub_epi64(_mm_xor_si128(a_.m128i, m), m);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_abs(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_s64
|
||||
#define vabsq_s64(a) simde_vabsq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABS_H) */
|
||||
744
lib/simd_wrapper/simde/arm/neon/add.h
Normal file
744
lib/simd_wrapper/simde/arm/neon/add.h
Normal file
@@ -0,0 +1,744 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADD_H)
|
||||
#define SIMDE_ARM_NEON_ADD_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16
|
||||
simde_vaddh_f16(simde_float16 a, simde_float16 b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vaddh_f16(a, b);
|
||||
#else
|
||||
simde_float32 af = simde_float16_to_float32(a);
|
||||
simde_float32 bf = simde_float16_to_float32(b);
|
||||
return simde_float16_from_float32(af + bf);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddh_f16
|
||||
#define vaddh_f16(a, b) simde_vaddh_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vaddd_s64(int64_t a, int64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddd_s64(a, b);
|
||||
#else
|
||||
return a + b;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddd_s64
|
||||
#define vaddd_s64(a, b) simde_vaddd_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vaddd_u64(uint64_t a, uint64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddd_u64(a, b);
|
||||
#else
|
||||
return a + b;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddd_u64
|
||||
#define vaddd_u64(a, b) simde_vaddd_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x4_t
|
||||
simde_vadd_f16(simde_float16x4_t a, simde_float16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vadd_f16(a, b);
|
||||
#else
|
||||
simde_float16x4_private
|
||||
r_,
|
||||
a_ = simde_float16x4_to_private(a),
|
||||
b_ = simde_float16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_float16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_f16
|
||||
#define vadd_f16(a, b) simde_vadd_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_f32
|
||||
#define vadd_f32(a, b) simde_vadd_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vadd_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vadd_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_f64
|
||||
#define vadd_f64(a, b) simde_vadd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_add_pi8(a_.m64, b_.m64);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_s8
|
||||
#define vadd_s8(a, b) simde_vadd_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_add_pi16(a_.m64, b_.m64);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_s16
|
||||
#define vadd_s16(a, b) simde_vadd_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_add_pi32(a_.m64, b_.m64);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_s32
|
||||
#define vadd_s32(a, b) simde_vadd_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vadd_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_s64
|
||||
#define vadd_s64(a, b) simde_vadd_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_u8
|
||||
#define vadd_u8(a, b) simde_vadd_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_u16
|
||||
#define vadd_u16(a, b) simde_vadd_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_u32
|
||||
#define vadd_u32(a, b) simde_vadd_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vadd_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vadd_u64
|
||||
#define vadd_u64(a, b) simde_vadd_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x8_t
|
||||
simde_vaddq_f16(simde_float16x8_t a, simde_float16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vaddq_f16(a, b);
|
||||
#else
|
||||
simde_float16x8_private
|
||||
r_,
|
||||
a_ = simde_float16x8_to_private(a),
|
||||
b_ = simde_float16x8_to_private(b);
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_float16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_f16
|
||||
#define vaddq_f16(a, b) simde_vaddq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vaddq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_;
|
||||
a_ = a;
|
||||
b_ = b;
|
||||
r_ = vec_add(a_, b_);
|
||||
return r_;
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
r_.m128 = _mm_add_ps(a_.m128, b_.m128);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_f32
|
||||
#define vaddq_f32(a, b) simde_vaddq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vaddq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128d = _mm_add_pd(a_.m128d, b_.m128d);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_f64
|
||||
#define vaddq_f64(a, b) simde_vaddq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_add_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_s8
|
||||
#define vaddq_s8(a, b) simde_vaddq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_add_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_s16
|
||||
#define vaddq_s16(a, b) simde_vaddq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_add_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_s32
|
||||
#define vaddq_s32(a, b) simde_vaddq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vaddq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_add_epi64(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_add(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_s64
|
||||
#define vaddq_s64(a, b) simde_vaddq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_u8
|
||||
#define vaddq_u8(a, b) simde_vaddq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_u16
|
||||
#define vaddq_u16(a, b) simde_vaddq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_u32
|
||||
#define vaddq_u32(a, b) simde_vaddq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_add(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values + b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddq_u64
|
||||
#define vaddq_u64(a, b) simde_vaddq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADD_H) */
|
||||
211
lib/simd_wrapper/simde/arm/neon/addhn.h
Normal file
211
lib/simd_wrapper/simde/arm/neon/addhn.h
Normal file
@@ -0,0 +1,211 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDHN_H)
|
||||
#define SIMDE_ARM_NEON_ADDHN_H
|
||||
|
||||
#include "add.h"
|
||||
#include "shr_n.h"
|
||||
#include "movn.h"
|
||||
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vaddhn_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_s16(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_int8x8_private r_;
|
||||
simde_int8x16_private tmp_ =
|
||||
simde_int8x16_to_private(
|
||||
simde_vreinterpretq_s8_s16(
|
||||
simde_vaddq_s16(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14);
|
||||
#endif
|
||||
return simde_int8x8_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddq_s16(a, b), 8));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_s16
|
||||
#define vaddhn_s16(a, b) simde_vaddhn_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vaddhn_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_s32(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_int16x4_private r_;
|
||||
simde_int16x8_private tmp_ =
|
||||
simde_int16x8_to_private(
|
||||
simde_vreinterpretq_s16_s32(
|
||||
simde_vaddq_s32(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6);
|
||||
#endif
|
||||
return simde_int16x4_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddq_s32(a, b), 16));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_s32
|
||||
#define vaddhn_s32(a, b) simde_vaddhn_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vaddhn_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_s64(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_int32x2_private r_;
|
||||
simde_int32x4_private tmp_ =
|
||||
simde_int32x4_to_private(
|
||||
simde_vreinterpretq_s32_s64(
|
||||
simde_vaddq_s64(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2);
|
||||
#endif
|
||||
return simde_int32x2_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddq_s64(a, b), 32));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_s64
|
||||
#define vaddhn_s64(a, b) simde_vaddhn_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vaddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_u16(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_uint8x8_private r_;
|
||||
simde_uint8x16_private tmp_ =
|
||||
simde_uint8x16_to_private(
|
||||
simde_vreinterpretq_u8_u16(
|
||||
simde_vaddq_u16(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14);
|
||||
#endif
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddq_u16(a, b), 8));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_u16
|
||||
#define vaddhn_u16(a, b) simde_vaddhn_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vaddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_u32(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_uint16x4_private r_;
|
||||
simde_uint16x8_private tmp_ =
|
||||
simde_uint16x8_to_private(
|
||||
simde_vreinterpretq_u16_u32(
|
||||
simde_vaddq_u32(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6);
|
||||
#endif
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddq_u32(a, b), 16));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_u32
|
||||
#define vaddhn_u32(a, b) simde_vaddhn_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vaddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddhn_u64(a, b);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint32x4_private tmp_ =
|
||||
simde_uint32x4_to_private(
|
||||
simde_vreinterpretq_u32_u64(
|
||||
simde_vaddq_u64(a, b)
|
||||
)
|
||||
);
|
||||
#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3);
|
||||
#else
|
||||
r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2);
|
||||
#endif
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddq_u64(a, b), 32));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddhn_u64
|
||||
#define vaddhn_u64(a, b) simde_vaddhn_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDHN_H) */
|
||||
127
lib/simd_wrapper/simde/arm/neon/addl.h
Normal file
127
lib/simd_wrapper/simde/arm/neon/addl.h
Normal file
@@ -0,0 +1,127 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDL_H)
|
||||
#define SIMDE_ARM_NEON_ADDL_H
|
||||
|
||||
#include "add.h"
|
||||
#include "movl.h"
|
||||
#include "movl_high.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vaddl_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_s8(a, b);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_s8
|
||||
#define vaddl_s8(a, b) simde_vaddl_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vaddl_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_s16(a, b);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_s16
|
||||
#define vaddl_s16(a, b) simde_vaddl_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vaddl_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_s32(a, b);
|
||||
#else
|
||||
return simde_vaddq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_s32
|
||||
#define vaddl_s32(a, b) simde_vaddl_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vaddl_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_u8(a, b);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_u8
|
||||
#define vaddl_u8(a, b) simde_vaddl_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vaddl_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_u16(a, b);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_u16
|
||||
#define vaddl_u16(a, b) simde_vaddl_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vaddl_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddl_u32(a, b);
|
||||
#else
|
||||
return simde_vaddq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_u32
|
||||
#define vaddl_u32(a, b) simde_vaddl_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDL_H) */
|
||||
127
lib/simd_wrapper/simde/arm/neon/addl_high.h
Normal file
127
lib/simd_wrapper/simde/arm/neon/addl_high.h
Normal file
@@ -0,0 +1,127 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDL_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_ADDL_HIGH_H
|
||||
|
||||
#include "add.h"
|
||||
#include "movl.h"
|
||||
#include "movl_high.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vaddl_high_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_s8(a, b);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_s8
|
||||
#define vaddl_high_s8(a, b) simde_vaddl_high_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vaddl_high_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_s16(a, b);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_s16
|
||||
#define vaddl_high_s16(a, b) simde_vaddl_high_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vaddl_high_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_s32(a, b);
|
||||
#else
|
||||
return simde_vaddq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_s32
|
||||
#define vaddl_high_s32(a, b) simde_vaddl_high_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vaddl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_u8(a, b);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_u8
|
||||
#define vaddl_high_u8(a, b) simde_vaddl_high_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vaddl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_u16(a, b);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_u16
|
||||
#define vaddl_high_u16(a, b) simde_vaddl_high_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vaddl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddl_high_u32(a, b);
|
||||
#else
|
||||
return simde_vaddq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddl_high_u32
|
||||
#define vaddl_high_u32(a, b) simde_vaddl_high_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) */
|
||||
317
lib/simd_wrapper/simde/arm/neon/addlv.h
Normal file
317
lib/simd_wrapper/simde/arm/neon/addlv.h
Normal file
@@ -0,0 +1,317 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDLV_H)
|
||||
#define SIMDE_ARM_NEON_ADDLV_H
|
||||
|
||||
#include "types.h"
|
||||
#include "movl.h"
|
||||
#include "addv.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vaddlv_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_s16(simde_vmovl_s8(a));
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
int16_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_s8
|
||||
#define vaddlv_s8(a) simde_vaddlv_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vaddlv_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_s32(simde_vmovl_s16(a));
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
int32_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_s16
|
||||
#define vaddlv_s16(a) simde_vaddlv_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vaddlv_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_s64(simde_vmovl_s32(a));
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
int64_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_s32
|
||||
#define vaddlv_s32(a) simde_vaddlv_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vaddlv_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_u8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_u16(simde_vmovl_u8(a));
|
||||
#else
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
uint16_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_u8
|
||||
#define vaddlv_u8(a) simde_vaddlv_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vaddlv_u16(simde_uint16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_u16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_u32(simde_vmovl_u16(a));
|
||||
#else
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
uint32_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_u16
|
||||
#define vaddlv_u16(a) simde_vaddlv_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vaddlv_u32(simde_uint32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlv_u32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddvq_u64(simde_vmovl_u32(a));
|
||||
#else
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
uint64_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlv_u32
|
||||
#define vaddlv_u32(a) simde_vaddlv_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vaddlvq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_s8(a);
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
int16_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_s8
|
||||
#define vaddlvq_s8(a) simde_vaddlvq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vaddlvq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_s16(a);
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
int32_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_s16
|
||||
#define vaddlvq_s16(a) simde_vaddlvq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vaddlvq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_s32(a);
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
int64_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_s32
|
||||
#define vaddlvq_s32(a) simde_vaddlvq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vaddlvq_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_u8(a);
|
||||
#else
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
uint16_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_u8
|
||||
#define vaddlvq_u8(a) simde_vaddlvq_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vaddlvq_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_u16(a);
|
||||
#else
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
uint32_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_u16
|
||||
#define vaddlvq_u16(a) simde_vaddlvq_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vaddlvq_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddlvq_u32(a);
|
||||
#else
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
uint64_t r = 0;
|
||||
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddlvq_u32
|
||||
#define vaddlvq_u32(a) simde_vaddlvq_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */
|
||||
447
lib/simd_wrapper/simde/arm/neon/addv.h
Normal file
447
lib/simd_wrapper/simde/arm/neon/addv.h
Normal file
@@ -0,0 +1,447 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDV_H)
|
||||
#define SIMDE_ARM_NEON_ADDV_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vaddv_f32(simde_float32x2_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_f32(a);
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_f32
|
||||
#define vaddv_f32(v) simde_vaddv_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vaddv_s8(simde_int8x8_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_s8(a);
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_s8
|
||||
#define vaddv_s8(v) simde_vaddv_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vaddv_s16(simde_int16x4_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_s16(a);
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_s16
|
||||
#define vaddv_s16(v) simde_vaddv_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vaddv_s32(simde_int32x2_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_s32(a);
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_s32
|
||||
#define vaddv_s32(v) simde_vaddv_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vaddv_u8(simde_uint8x8_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_u8
|
||||
#define vaddv_u8(v) simde_vaddv_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vaddv_u16(simde_uint16x4_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_u16
|
||||
#define vaddv_u16(v) simde_vaddv_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vaddv_u32(simde_uint32x2_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddv_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddv_u32
|
||||
#define vaddv_u32(v) simde_vaddv_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vaddvq_f32(simde_float32x4_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_f32(a);
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_f32
|
||||
#define vaddvq_f32(v) simde_vaddvq_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vaddvq_f64(simde_float64x2_t a) {
|
||||
simde_float64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_f64(a);
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_f64
|
||||
#define vaddvq_f64(v) simde_vaddvq_f64(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vaddvq_s8(simde_int8x16_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_s8(a);
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_s8
|
||||
#define vaddvq_s8(v) simde_vaddvq_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vaddvq_s16(simde_int16x8_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_s16(a);
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_s16
|
||||
#define vaddvq_s16(v) simde_vaddvq_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vaddvq_s32(simde_int32x4_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_s32(a);
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_s32
|
||||
#define vaddvq_s32(v) simde_vaddvq_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vaddvq_s64(simde_int64x2_t a) {
|
||||
int64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_s64(a);
|
||||
#else
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_s64
|
||||
#define vaddvq_s64(v) simde_vaddvq_s64(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vaddvq_u8(simde_uint8x16_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_u8(a);
|
||||
#else
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_u8
|
||||
#define vaddvq_u8(v) simde_vaddvq_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vaddvq_u16(simde_uint16x8_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_u16(a);
|
||||
#else
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_u16
|
||||
#define vaddvq_u16(v) simde_vaddvq_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vaddvq_u32(simde_uint32x4_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_u32(a);
|
||||
#else
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_u32
|
||||
#define vaddvq_u32(v) simde_vaddvq_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vaddvq_u64(simde_uint64x2_t a) {
|
||||
uint64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vaddvq_u64(a);
|
||||
#else
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r += a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddvq_u64
|
||||
#define vaddvq_u64(v) simde_vaddvq_u64(v)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDV_H) */
|
||||
222
lib/simd_wrapper/simde/arm/neon/addw.h
Normal file
222
lib/simd_wrapper/simde/arm/neon/addw.h
Normal file
@@ -0,0 +1,222 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDW_H)
|
||||
#define SIMDE_ARM_NEON_ADDW_H
|
||||
|
||||
#include "types.h"
|
||||
#include "add.h"
|
||||
#include "movl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_s8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s16(a, simde_vmovl_s8(b));
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
simde_int8x8_private b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_s8
|
||||
#define vaddw_s8(a, b) simde_vaddw_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s32(a, simde_vmovl_s16(b));
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
simde_int16x4_private b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_s16
|
||||
#define vaddw_s16(a, b) simde_vaddw_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_s32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s64(a, simde_vmovl_s32(b));
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
simde_int32x2_private b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_s32
|
||||
#define vaddw_s32(a, b) simde_vaddw_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_u8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u16(a, simde_vmovl_u8(b));
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
simde_uint8x8_private b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_u8
|
||||
#define vaddw_u8(a, b) simde_vaddw_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_u16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u32(a, simde_vmovl_u16(b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
simde_uint16x4_private b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_u16
|
||||
#define vaddw_u16(a, b) simde_vaddw_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaddw_u32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u64(a, simde_vmovl_u32(b));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
simde_uint32x2_private b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values);
|
||||
r_.values += a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_u32
|
||||
#define vaddw_u32(a, b) simde_vaddw_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDW_H) */
|
||||
191
lib/simd_wrapper/simde/arm/neon/addw_high.h
Normal file
191
lib/simd_wrapper/simde/arm/neon/addw_high.h
Normal file
@@ -0,0 +1,191 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_ADDW_HIGH_H
|
||||
|
||||
#include "types.h"
|
||||
#include "movl_high.h"
|
||||
#include "add.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_s8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s16(a, simde_vmovl_high_s8(b));
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
simde_int8x16_private b_ = simde_int8x16_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_s8
|
||||
#define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s32(a, simde_vmovl_high_s16(b));
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
simde_int16x8_private b_ = simde_int16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_s16
|
||||
#define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_s32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_s64(a, simde_vmovl_high_s32(b));
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
simde_int32x4_private b_ = simde_int32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_s32
|
||||
#define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_u8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u16(a, simde_vmovl_high_u8(b));
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_u8
|
||||
#define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_u16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u32(a, simde_vmovl_high_u16(b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
simde_uint16x8_private b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_u16
|
||||
#define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vaddw_high_u32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vaddq_u64(a, simde_vmovl_high_u32(b));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
simde_uint32x4_private b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
|
||||
}
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaddw_high_u32
|
||||
#define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */
|
||||
552
lib/simd_wrapper/simde/arm/neon/and.h
Normal file
552
lib/simd_wrapper/simde/arm/neon/and.h
Normal file
@@ -0,0 +1,552 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_AND_H)
|
||||
#define SIMDE_ARM_NEON_AND_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vand_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_s8
|
||||
#define vand_s8(a, b) simde_vand_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vand_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_s16
|
||||
#define vand_s16(a, b) simde_vand_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vand_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_s32
|
||||
#define vand_s32(a, b) simde_vand_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vand_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_s64
|
||||
#define vand_s64(a, b) simde_vand_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vand_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_u8
|
||||
#define vand_u8(a, b) simde_vand_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vand_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_u16
|
||||
#define vand_u16(a, b) simde_vand_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vand_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_u32
|
||||
#define vand_u32(a, b) simde_vand_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vand_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vand_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_and_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vand_u64
|
||||
#define vand_u64(a, b) simde_vand_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vandq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_s8
|
||||
#define vandq_s8(a, b) simde_vandq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vandq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_s16
|
||||
#define vandq_s16(a, b) simde_vandq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vandq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_s32
|
||||
#define vandq_s32(a, b) simde_vandq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vandq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_s64
|
||||
#define vandq_s64(a, b) simde_vandq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vandq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_u8
|
||||
#define vandq_u8(a, b) simde_vandq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vandq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_u16
|
||||
#define vandq_u16(a, b) simde_vandq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vandq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_u32
|
||||
#define vandq_u32(a, b) simde_vandq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vandq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vandq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_and(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_and_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_and(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values & b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vandq_u64
|
||||
#define vandq_u64(a, b) simde_vandq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_AND_H) */
|
||||
154
lib/simd_wrapper/simde/arm/neon/bcax.h
Normal file
154
lib/simd_wrapper/simde/arm/neon/bcax.h
Normal file
@@ -0,0 +1,154 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_BCAX_H)
|
||||
#define SIMDE_ARM_NEON_BCAX_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "eor.h"
|
||||
#include "bic.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vbcaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_u8(a, b, c);
|
||||
#else
|
||||
return simde_veorq_u8(a, simde_vbicq_u8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_u8
|
||||
#define vbcaxq_u8(a, b, c) simde_vbcaxq_u8(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vbcaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_u16(a, b, c);
|
||||
#else
|
||||
return simde_veorq_u16(a, simde_vbicq_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_u16
|
||||
#define vbcaxq_u16(a, b, c) simde_vbcaxq_u16(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vbcaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_u32(a, b, c);
|
||||
#else
|
||||
return simde_veorq_u32(a, simde_vbicq_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_u32
|
||||
#define vbcaxq_u32(a, b, c) simde_vbcaxq_u32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vbcaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_u64(a, b, c);
|
||||
#else
|
||||
return simde_veorq_u64(a, simde_vbicq_u64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_u64
|
||||
#define vbcaxq_u64(a, b, c) simde_vbcaxq_u64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vbcaxq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_s8(a, b, c);
|
||||
#else
|
||||
return simde_veorq_s8(a, simde_vbicq_s8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_s8
|
||||
#define vbcaxq_s8(a, b, c) simde_vbcaxq_s8(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vbcaxq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_s16(a, b, c);
|
||||
#else
|
||||
return simde_veorq_s16(a,simde_vbicq_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_s16
|
||||
#define vbcaxq_s16(a, b, c) simde_vbcaxq_s16(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vbcaxq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_s32(a, b, c);
|
||||
#else
|
||||
return simde_veorq_s32(a, simde_vbicq_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_s32
|
||||
#define vbcaxq_s32(a, b, c) simde_vbcaxq_s32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vbcaxq_s64(simde_int64x2_t a, simde_int64x2_t b, simde_int64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3)
|
||||
return vbcaxq_s64(a, b, c);
|
||||
#else
|
||||
return simde_veorq_s64(a, simde_vbicq_s64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3))
|
||||
#undef vbcaxq_s64
|
||||
#define vbcaxq_s64(a, b, c) simde_vbcaxq_s64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_BCAX_H) */
|
||||
504
lib/simd_wrapper/simde/arm/neon/bic.h
Normal file
504
lib/simd_wrapper/simde/arm/neon/bic.h
Normal file
@@ -0,0 +1,504 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_BIC_H)
|
||||
#define SIMDE_ARM_NEON_BIC_H
|
||||
|
||||
#include "dup_n.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_s8
|
||||
#define vbic_s8(a, b) simde_vbic_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_s16
|
||||
#define vbic_s16(a, b) simde_vbic_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_s32
|
||||
#define vbic_s32(a, b) simde_vbic_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_s64
|
||||
#define vbic_s64(a, b) simde_vbic_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_u8
|
||||
#define vbic_u8(a, b) simde_vbic_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_u16
|
||||
#define vbic_u16(a, b) simde_vbic_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_u32
|
||||
#define vbic_u32(a, b) simde_vbic_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbic_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(b_.m64, a_.m64);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbic_u64
|
||||
#define vbic_u64(a, b) simde_vbic_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_s8
|
||||
#define vbicq_s8(a, b) simde_vbicq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_s16
|
||||
#define vbicq_s16(a, b) simde_vbicq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_s32
|
||||
#define vbicq_s32(a, b) simde_vbicq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_s64
|
||||
#define vbicq_s64(a, b) simde_vbicq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_u8
|
||||
#define vbicq_u8(a, b) simde_vbicq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_u16
|
||||
#define vbicq_u16(a, b) simde_vbicq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_u32
|
||||
#define vbicq_u32(a, b) simde_vbicq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbicq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_andc(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_andnot(a_.v128, b_.v128);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] & ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbicq_u64
|
||||
#define vbicq_u64(a, b) simde_vbicq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_BIC_H) */
|
||||
761
lib/simd_wrapper/simde/arm/neon/bsl.h
Normal file
761
lib/simd_wrapper/simde/arm/neon/bsl.h
Normal file
@@ -0,0 +1,761 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_BSL_H)
|
||||
#define SIMDE_ARM_NEON_BSL_H
|
||||
|
||||
#include "types.h"
|
||||
#include "reinterpret.h"
|
||||
#include "and.h"
|
||||
#include "eor.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x4_t
|
||||
simde_vbsl_f16(simde_uint16x4_t a, simde_float16x4_t b, simde_float16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vbsl_f16(a, b, c);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(b)),
|
||||
c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_f16_u16(simde_uint16x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_f16
|
||||
#define vbsl_f16(a, b, c) simde_vbsl_f16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vbsl_f32(simde_uint32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_f32(a, b, c);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(b)),
|
||||
c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_f32_u32(simde_uint32x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_f32
|
||||
#define vbsl_f32(a, b, c) simde_vbsl_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vbsl_f64(simde_uint64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vbsl_f64(a, b, c);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(b)),
|
||||
c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_f64_u64(simde_uint64x1_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_f64
|
||||
#define vbsl_f64(a, b, c) simde_vbsl_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vbsl_s8(simde_uint8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_s8(a, b, c);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(b)),
|
||||
c_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_s8_u8(simde_uint8x8_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_s8
|
||||
#define vbsl_s8(a, b, c) simde_vbsl_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vbsl_s16(simde_uint16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_s16(a, b, c);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(b)),
|
||||
c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_s16_u16(simde_uint16x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_s16
|
||||
#define vbsl_s16(a, b, c) simde_vbsl_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vbsl_s32(simde_uint32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_s32(a, b, c);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(b)),
|
||||
c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_s32_u32(simde_uint32x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_s32
|
||||
#define vbsl_s32(a, b, c) simde_vbsl_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vbsl_s64(simde_uint64x1_t a, simde_int64x1_t b, simde_int64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_s64(a, b, c);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(b)),
|
||||
c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpret_s64_u64(simde_uint64x1_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_s64
|
||||
#define vbsl_s64(a, b, c) simde_vbsl_s64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_u8(a, b, c);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b),
|
||||
c_ = simde_uint8x8_to_private(c);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_u8
|
||||
#define vbsl_u8(a, b, c) simde_vbsl_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vbsl_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_u16(a, b, c);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b),
|
||||
c_ = simde_uint16x4_to_private(c);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_u16
|
||||
#define vbsl_u16(a, b, c) simde_vbsl_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vbsl_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_u32(a, b, c);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b),
|
||||
c_ = simde_uint32x2_to_private(c);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_u32
|
||||
#define vbsl_u32(a, b, c) simde_vbsl_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vbsl_u64(simde_uint64x1_t a, simde_uint64x1_t b, simde_uint64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbsl_u64(a, b, c);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b),
|
||||
c_ = simde_uint64x1_to_private(c);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbsl_u64
|
||||
#define vbsl_u64(a, b, c) simde_vbsl_u64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x8_t
|
||||
simde_vbslq_f16(simde_uint16x8_t a, simde_float16x8_t b, simde_float16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vbslq_f16(a, b, c);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(b)),
|
||||
c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_f16_u16(simde_uint16x8_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_f16
|
||||
#define vbslq_f16(a, b, c) simde_vbslq_f16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vbslq_f32(simde_uint32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_f32(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(b)),
|
||||
c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_f32_u32(simde_uint32x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_f32
|
||||
#define vbslq_f32(a, b, c) simde_vbslq_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vbslq_f64(simde_uint64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vbslq_f64(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_f64_u64(simde_uint64x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_f64
|
||||
#define vbslq_f64(a, b, c) simde_vbslq_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vbslq_s8(simde_uint8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_s8(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(b)),
|
||||
c_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_s8_u8(simde_uint8x16_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_s8
|
||||
#define vbslq_s8(a, b, c) simde_vbslq_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vbslq_s16(simde_uint16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_s16(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(b)),
|
||||
c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_s16_u16(simde_uint16x8_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_s16
|
||||
#define vbslq_s16(a, b, c) simde_vbslq_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vbslq_s32(simde_uint32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_s32(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(b)),
|
||||
c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_s32_u32(simde_uint32x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_s32
|
||||
#define vbslq_s32(a, b, c) simde_vbslq_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vbslq_s64(simde_uint64x2_t a, simde_int64x2_t b, simde_int64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_s64(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return
|
||||
simde_vreinterpretq_s64_s32(
|
||||
simde_vbslq_s32(
|
||||
simde_vreinterpretq_u32_u64(a),
|
||||
simde_vreinterpretq_s32_s64(b),
|
||||
simde_vreinterpretq_s32_s64(c)
|
||||
)
|
||||
);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(
|
||||
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), c),
|
||||
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b),
|
||||
HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a));
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(c));
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_vreinterpretq_s64_u64(simde_uint64x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_s64
|
||||
#define vbslq_s64(a, b, c) simde_vbslq_s64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_u8(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b),
|
||||
c_ = simde_uint8x16_to_private(c);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_u8
|
||||
#define vbslq_u8(a, b, c) simde_vbslq_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vbslq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_u16(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b),
|
||||
c_ = simde_uint16x8_to_private(c);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_u16
|
||||
#define vbslq_u16(a, b, c) simde_vbslq_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vbslq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_u32(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_sel(c, b, a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b),
|
||||
c_ = simde_uint32x4_to_private(c);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_u32
|
||||
#define vbslq_u32(a, b, c) simde_vbslq_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vbslq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vbslq_u64(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return
|
||||
simde_vreinterpretq_u64_u32(
|
||||
simde_vbslq_u32(
|
||||
simde_vreinterpretq_u32_u64(a),
|
||||
simde_vreinterpretq_u32_u64(b),
|
||||
simde_vreinterpretq_u32_u64(c)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b),
|
||||
c_ = simde_uint64x2_to_private(c);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vbslq_u64
|
||||
#define vbslq_u64(a, b, c) simde_vbslq_u64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_BSL_H) */
|
||||
189
lib/simd_wrapper/simde/arm/neon/cage.h
Normal file
189
lib/simd_wrapper/simde/arm/neon/cage.h
Normal file
@@ -0,0 +1,189 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CAGE_H)
|
||||
#define SIMDE_ARM_NEON_CAGE_H
|
||||
|
||||
#include "types.h"
|
||||
#include "abs.h"
|
||||
#include "cge.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vcageh_f16(simde_float16_t a, simde_float16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcageh_f16(a, b);
|
||||
#else
|
||||
simde_float32_t a_ = simde_float16_to_float32(a);
|
||||
simde_float32_t b_ = simde_float16_to_float32(b);
|
||||
return (simde_math_fabsf(a_) >= simde_math_fabsf(b_)) ? UINT16_MAX : UINT16_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcageh_f16
|
||||
#define vcageh_f16(a, b) simde_vcageh_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcages_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcages_f32(a, b);
|
||||
#else
|
||||
return (simde_math_fabsf(a) >= simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcages_f32
|
||||
#define vcages_f32(a, b) simde_vcages_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcaged_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcaged_f64(a, b);
|
||||
#else
|
||||
return (simde_math_fabs(a) >= simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcaged_f64
|
||||
#define vcaged_f64(a, b) simde_vcaged_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcage_f16(simde_float16x4_t a, simde_float16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcage_f16(a, b);
|
||||
#else
|
||||
simde_float16x4_private
|
||||
a_ = simde_float16x4_to_private(a),
|
||||
b_ = simde_float16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcage_f16
|
||||
#define vcage_f16(a, b) simde_vcage_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcage_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcage_f32(a, b);
|
||||
#else
|
||||
return simde_vcge_f32(simde_vabs_f32(a), simde_vabs_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcage_f32
|
||||
#define vcage_f32(a, b) simde_vcage_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcage_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcage_f64(a, b);
|
||||
#else
|
||||
return simde_vcge_f64(simde_vabs_f64(a), simde_vabs_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcage_f64
|
||||
#define vcage_f64(a, b) simde_vcage_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcageq_f16(simde_float16x8_t a, simde_float16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcageq_f16(a, b);
|
||||
#else
|
||||
simde_float16x8_private
|
||||
a_ = simde_float16x8_to_private(a),
|
||||
b_ = simde_float16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcageq_f16
|
||||
#define vcageq_f16(a, b) simde_vcageq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcageq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcageq_f32(a, b);
|
||||
#else
|
||||
return simde_vcgeq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcageq_f32
|
||||
#define vcageq_f32(a, b) simde_vcageq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcageq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcageq_f64(a, b);
|
||||
#else
|
||||
return simde_vcgeq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcageq_f64
|
||||
#define vcageq_f64(a, b) simde_vcageq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CAGE_H) */
|
||||
189
lib/simd_wrapper/simde/arm/neon/cagt.h
Normal file
189
lib/simd_wrapper/simde/arm/neon/cagt.h
Normal file
@@ -0,0 +1,189 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CAGT_H)
|
||||
#define SIMDE_ARM_NEON_CAGT_H
|
||||
|
||||
#include "types.h"
|
||||
#include "abs.h"
|
||||
#include "cgt.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vcagth_f16(simde_float16_t a, simde_float16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcagth_f16(a, b);
|
||||
#else
|
||||
simde_float32_t
|
||||
af = simde_float16_to_float32(a),
|
||||
bf = simde_float16_to_float32(b);
|
||||
return (simde_math_fabsf(af) > simde_math_fabsf(bf)) ? UINT16_MAX : UINT16_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagth_f16
|
||||
#define vcagth_f16(a, b) simde_vcagth_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcagts_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcagts_f32(a, b);
|
||||
#else
|
||||
return (simde_math_fabsf(a) > simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagts_f32
|
||||
#define vcagts_f32(a, b) simde_vcagts_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcagtd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcagtd_f64(a, b);
|
||||
#else
|
||||
return (simde_math_fabs(a) > simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagtd_f64
|
||||
#define vcagtd_f64(a, b) simde_vcagtd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcagt_f16(simde_float16x4_t a, simde_float16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcagt_f16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_float16x4_private
|
||||
a_ = simde_float16x4_to_private(a),
|
||||
b_ = simde_float16x4_to_private(b);
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagt_f16
|
||||
#define vcagt_f16(a, b) simde_vcagt_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcagt_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcagt_f32(a, b);
|
||||
#else
|
||||
return simde_vcgt_f32(simde_vabs_f32(a), simde_vabs_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagt_f32
|
||||
#define vcagt_f32(a, b) simde_vcagt_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcagt_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcagt_f64(a, b);
|
||||
#else
|
||||
return simde_vcgt_f64(simde_vabs_f64(a), simde_vabs_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagt_f64
|
||||
#define vcagt_f64(a, b) simde_vcagt_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcagtq_f16(simde_float16x8_t a, simde_float16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcagtq_f16(a, b);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_float16x8_private
|
||||
a_ = simde_float16x8_to_private(a),
|
||||
b_ = simde_float16x8_to_private(b);
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagtq_f16
|
||||
#define vcagtq_f16(a, b) simde_vcagtq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcagtq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcagtq_f32(a, b);
|
||||
#else
|
||||
return simde_vcgtq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagtq_f32
|
||||
#define vcagtq_f32(a, b) simde_vcagtq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcagtq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcagtq_f64(a, b);
|
||||
#else
|
||||
return simde_vcgtq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcagtq_f64
|
||||
#define vcagtq_f64(a, b) simde_vcagtq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */
|
||||
772
lib/simd_wrapper/simde/arm/neon/ceq.h
Normal file
772
lib/simd_wrapper/simde/arm/neon/ceq.h
Normal file
@@ -0,0 +1,772 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CEQ_H)
|
||||
#define SIMDE_ARM_NEON_CEQ_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vceqh_f16(simde_float16_t a, simde_float16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceqh_f16(a, b);
|
||||
#else
|
||||
return (simde_float16_to_float32(a) == simde_float16_to_float32(b)) ? UINT16_MAX : UINT16_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqh_f16
|
||||
#define vceqh_f16(a, b) simde_vceqh_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vceqs_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqs_f32(a, b);
|
||||
#else
|
||||
return (a == b) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqs_f32
|
||||
#define vceqs_f32(a, b) simde_vceqs_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqd_f64(a, b);
|
||||
#else
|
||||
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqd_f64
|
||||
#define vceqd_f64(a, b) simde_vceqd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqd_s64(int64_t a, int64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vceqd_s64(a, b));
|
||||
#else
|
||||
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqd_s64
|
||||
#define vceqd_s64(a, b) simde_vceqd_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqd_u64(uint64_t a, uint64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqd_u64(a, b);
|
||||
#else
|
||||
return (a == b) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqd_u64
|
||||
#define vceqd_u64(a, b) simde_vceqd_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceq_f16(simde_float16x4_t a, simde_float16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceq_f16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_float16x4_private
|
||||
a_ = simde_float16x4_to_private(a),
|
||||
b_ = simde_float16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_f16
|
||||
#define vceq_f16(a, b) simde_vceq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceq_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_f32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_f32
|
||||
#define vceq_f32(a, b) simde_vceq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceq_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceq_f64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_f64
|
||||
#define vceq_f64(a, b) simde_vceq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vceq_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_s8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpeq_pi8(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_s8
|
||||
#define vceq_s8(a, b) simde_vceq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceq_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_s16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpeq_pi16(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_s16
|
||||
#define vceq_s16(a, b) simde_vceq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceq_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_s32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpeq_pi32(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_s32
|
||||
#define vceq_s32(a, b) simde_vceq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceq_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceq_s64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_s64
|
||||
#define vceq_s64(a, b) simde_vceq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vceq_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_u8
|
||||
#define vceq_u8(a, b) simde_vceq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceq_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_u16
|
||||
#define vceq_u16(a, b) simde_vceq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceq_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceq_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_u32
|
||||
#define vceq_u32(a, b) simde_vceq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceq_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceq_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_uint64x1_private
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceq_u64
|
||||
#define vceq_u64(a, b) simde_vceq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqq_f16(simde_float16x8_t a, simde_float16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceqq_f16(a, b);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_float16x8_private
|
||||
a_ = simde_float16x8_to_private(a),
|
||||
b_ = simde_float16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_f16
|
||||
#define vceqq_f16(a, b) simde_vceqq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_cmpeq_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_eq(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_f32
|
||||
#define vceqq_f32(a, b) simde_vceqq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castpd_si128(_mm_cmpeq_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_eq(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_f64
|
||||
#define vceqq_f64(a, b) simde_vceqq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vceqq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint8x16_private r_;
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_eq(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_s8
|
||||
#define vceqq_s8(a, b) simde_vceqq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_eq(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_s16
|
||||
#define vceqq_s16(a, b) simde_vceqq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_eq(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_s32
|
||||
#define vceqq_s32(a, b) simde_vceqq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_s64
|
||||
#define vceqq_s64(a, b) simde_vceqq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vceqq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint8x16_private r_;
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_u8
|
||||
#define vceqq_u8(a, b) simde_vceqq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint16x8_private
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_u16
|
||||
#define vceqq_u16(a, b) simde_vceqq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vceqq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint32x4_private
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_u32
|
||||
#define vceqq_u32(a, b) simde_vceqq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint64x2_private
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqq_u64
|
||||
#define vceqq_u64(a, b) simde_vceqq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CEQ_H) */
|
||||
421
lib/simd_wrapper/simde/arm/neon/ceqz.h
Normal file
421
lib/simd_wrapper/simde/arm/neon/ceqz.h
Normal file
@@ -0,0 +1,421 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CEQZ_H)
|
||||
#define SIMDE_ARM_NEON_CEQZ_H
|
||||
|
||||
#include "ceq.h"
|
||||
#include "dup_n.h"
|
||||
#include "types.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceqz_f16(simde_float16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceqz_f16(a);
|
||||
#else
|
||||
return simde_vceq_f16(a, simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_f16
|
||||
#define vceqz_f16(a) simde_vceqz_f16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceqz_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_f32(a);
|
||||
#else
|
||||
return simde_vceq_f32(a, simde_vdup_n_f32(0.0f));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_f32
|
||||
#define vceqz_f32(a) simde_vceqz_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceqz_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_f64(a);
|
||||
#else
|
||||
return simde_vceq_f64(a, simde_vdup_n_f64(0.0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_f64
|
||||
#define vceqz_f64(a) simde_vceqz_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vceqz_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_s8(a);
|
||||
#else
|
||||
return simde_vceq_s8(a, simde_vdup_n_s8(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_s8
|
||||
#define vceqz_s8(a) simde_vceqz_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceqz_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_s16(a);
|
||||
#else
|
||||
return simde_vceq_s16(a, simde_vdup_n_s16(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_s16
|
||||
#define vceqz_s16(a) simde_vceqz_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceqz_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_s32(a);
|
||||
#else
|
||||
return simde_vceq_s32(a, simde_vdup_n_s32(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_s32
|
||||
#define vceqz_s32(a) simde_vceqz_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceqz_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_s64(a);
|
||||
#else
|
||||
return simde_vceq_s64(a, simde_vdup_n_s64(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_s64
|
||||
#define vceqz_s64(a) simde_vceqz_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vceqz_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_u8(a);
|
||||
#else
|
||||
return simde_vceq_u8(a, simde_vdup_n_u8(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_u8
|
||||
#define vceqz_u8(a) simde_vceqz_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vceqz_u16(simde_uint16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_u16(a);
|
||||
#else
|
||||
return simde_vceq_u16(a, simde_vdup_n_u16(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_u16
|
||||
#define vceqz_u16(a) simde_vceqz_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vceqz_u32(simde_uint32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_u32(a);
|
||||
#else
|
||||
return simde_vceq_u32(a, simde_vdup_n_u32(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_u32
|
||||
#define vceqz_u32(a) simde_vceqz_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vceqz_u64(simde_uint64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqz_u64(a);
|
||||
#else
|
||||
return simde_vceq_u64(a, simde_vdup_n_u64(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqz_u64
|
||||
#define vceqz_u64(a) simde_vceqz_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqzq_f16(simde_float16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceqzq_f16(a);
|
||||
#else
|
||||
return simde_vceqq_f16(a, simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_f16
|
||||
#define vceqzq_f16(a) simde_vceqzq_f16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqzq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_f32(a);
|
||||
#else
|
||||
return simde_vceqq_f32(a, simde_vdupq_n_f32(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_f32
|
||||
#define vceqzq_f32(a) simde_vceqzq_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqzq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_f64(a);
|
||||
#else
|
||||
return simde_vceqq_f64(a, simde_vdupq_n_f64(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_f64
|
||||
#define vceqzq_f64(a) simde_vceqzq_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vceqzq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_s8(a);
|
||||
#else
|
||||
return simde_vceqq_s8(a, simde_vdupq_n_s8(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_s8
|
||||
#define vceqzq_s8(a) simde_vceqzq_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqzq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_s16(a);
|
||||
#else
|
||||
return simde_vceqq_s16(a, simde_vdupq_n_s16(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_s16
|
||||
#define vceqzq_s16(a) simde_vceqzq_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqzq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_s32(a);
|
||||
#else
|
||||
return simde_vceqq_s32(a, simde_vdupq_n_s32(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_s32
|
||||
#define vceqzq_s32(a) simde_vceqzq_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqzq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_s64(a);
|
||||
#else
|
||||
return simde_vceqq_s64(a, simde_vdupq_n_s64(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_s64
|
||||
#define vceqzq_s64(a) simde_vceqzq_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vceqzq_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_u8(a);
|
||||
#else
|
||||
return simde_vceqq_u8(a, simde_vdupq_n_u8(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_u8
|
||||
#define vceqzq_u8(a) simde_vceqzq_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vceqzq_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_u16(a);
|
||||
#else
|
||||
return simde_vceqq_u16(a, simde_vdupq_n_u16(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_u16
|
||||
#define vceqzq_u16(a) simde_vceqzq_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vceqzq_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_u32(a);
|
||||
#else
|
||||
return simde_vceqq_u32(a, simde_vdupq_n_u32(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_u32
|
||||
#define vceqzq_u32(a) simde_vceqzq_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vceqzq_u64(simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzq_u64(a);
|
||||
#else
|
||||
return simde_vceqq_u64(a, simde_vdupq_n_u64(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzq_u64
|
||||
#define vceqzq_u64(a) simde_vceqzq_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqzd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vceqzd_s64(a));
|
||||
#else
|
||||
return simde_vceqd_s64(a, INT64_C(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzd_s64
|
||||
#define vceqzd_s64(a) simde_vceqzd_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqzd_u64(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzd_u64(a);
|
||||
#else
|
||||
return simde_vceqd_u64(a, UINT64_C(0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzd_u64
|
||||
#define vceqzd_u64(a) simde_vceqzd_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vceqzh_f16(simde_float16 a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vceqzh_f16(a);
|
||||
#else
|
||||
return simde_vceqh_f16(a, SIMDE_FLOAT16_VALUE(0.0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzh_f16
|
||||
#define vceqzh_f16(a) simde_vceqzh_f16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vceqzs_f32(simde_float32_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzs_f32(a);
|
||||
#else
|
||||
return simde_vceqs_f32(a, SIMDE_FLOAT32_C(0.0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzs_f32
|
||||
#define vceqzs_f32(a) simde_vceqzs_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vceqzd_f64(simde_float64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vceqzd_f64(a);
|
||||
#else
|
||||
return simde_vceqd_f64(a, SIMDE_FLOAT64_C(0.0));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vceqzd_f64
|
||||
#define vceqzd_f64(a) simde_vceqzd_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CEQZ_H) */
|
||||
816
lib/simd_wrapper/simde/arm/neon/cge.h
Normal file
816
lib/simd_wrapper/simde/arm/neon/cge.h
Normal file
@@ -0,0 +1,816 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CGE_H)
|
||||
#define SIMDE_ARM_NEON_CGE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vcgeh_f16(simde_float16_t a, simde_float16_t b){
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return HEDLEY_STATIC_CAST(uint16_t, vcgeh_f16(a, b));
|
||||
#else
|
||||
return (simde_float16_to_float32(a) >= simde_float16_to_float32(b)) ? UINT16_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeh_f16
|
||||
#define vcgeh_f16(a, b) simde_vcgeh_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgeq_f16(simde_float16x8_t a, simde_float16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcgeq_f16(a, b);
|
||||
#else
|
||||
simde_float16x8_private
|
||||
a_ = simde_float16x8_to_private(a),
|
||||
b_ = simde_float16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_f16
|
||||
#define vcgeq_f16(a, b) simde_vcgeq_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgeq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_cmpge_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_f32
|
||||
#define vcgeq_f32(a, b) simde_vcgeq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgeq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgeq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_f64
|
||||
#define vcgeq_f64(a, b) simde_vcgeq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgeq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(a_.m128i, b_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_s8
|
||||
#define vcgeq_s8(a, b) simde_vcgeq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgeq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(a_.m128i, b_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_s16
|
||||
#define vcgeq_s16(a, b) simde_vcgeq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgeq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, b_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_s32
|
||||
#define vcgeq_s32(a, b) simde_vcgeq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgeq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgeq_s64(a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(a, b), 63))));
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(a_.m128i, b_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_s64
|
||||
#define vcgeq_s64(a, b) simde_vcgeq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi8(
|
||||
_mm_min_epu8(b_.m128i, a_.m128i),
|
||||
b_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_u8
|
||||
#define vcgeq_u8(a, b) simde_vcgeq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi16(
|
||||
_mm_min_epu16(b_.m128i, a_.m128i),
|
||||
b_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_u16
|
||||
#define vcgeq_u16(a, b) simde_vcgeq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgeq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi32(
|
||||
_mm_min_epu32(b_.m128i, a_.m128i),
|
||||
b_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_ge(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_u32
|
||||
#define vcgeq_u32(a, b) simde_vcgeq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgeq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b));
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi64(
|
||||
_mm_min_epu64(b_.m128i, a_.m128i),
|
||||
b_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgeq_u64
|
||||
#define vcgeq_u64(a, b) simde_vcgeq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcge_f16(simde_float16x4_t a, simde_float16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vcge_f16(a, b);
|
||||
#else
|
||||
simde_float16x4_private
|
||||
a_ = simde_float16x4_to_private(a),
|
||||
b_ = simde_float16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_f16
|
||||
#define vcge_f16(a, b) simde_vcge_f16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcge_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_f32
|
||||
#define vcge_f32(a, b) simde_vcge_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcge_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcge_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_f64
|
||||
#define vcge_f64(a, b) simde_vcge_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcge_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(a_.m64, b_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_s8
|
||||
#define vcge_s8(a, b) simde_vcge_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcge_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(a_.m64, b_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_s16
|
||||
#define vcge_s16(a, b) simde_vcge_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcge_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(a_.m64, b_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_s32
|
||||
#define vcge_s32(a, b) simde_vcge_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcge_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcge_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_s64
|
||||
#define vcge_s64(a, b) simde_vcge_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcge_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_u8
|
||||
#define vcge_u8(a, b) simde_vcge_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcge_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_u16
|
||||
#define vcge_u16(a, b) simde_vcge_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcge_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcge_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_u32
|
||||
#define vcge_u32(a, b) simde_vcge_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcge_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcge_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcge_u64
|
||||
#define vcge_u64(a, b) simde_vcge_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcged_f64(simde_float64_t a, simde_float64_t b){
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcged_f64(a, b));
|
||||
#else
|
||||
return (a >= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcged_f64
|
||||
#define vcged_f64(a, b) simde_vcged_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcged_s64(int64_t a, int64_t b){
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcged_s64(a, b));
|
||||
#else
|
||||
return (a >= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcged_s64
|
||||
#define vcged_s64(a, b) simde_vcged_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcged_u64(uint64_t a, uint64_t b){
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcged_u64(a, b));
|
||||
#else
|
||||
return (a >= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcged_u64
|
||||
#define vcged_u64(a, b) simde_vcged_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcges_f32(simde_float32_t a, simde_float32_t b){
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcges_f32(a, b));
|
||||
#else
|
||||
return (a >= b) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcges_f32
|
||||
#define vcges_f32(a, b) simde_vcges_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CGE_H) */
|
||||
420
lib/simd_wrapper/simde/arm/neon/cgez.h
Normal file
420
lib/simd_wrapper/simde/arm/neon/cgez.h
Normal file
@@ -0,0 +1,420 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CGEZ_H)
|
||||
#define SIMDE_ARM_NEON_CGEZ_H
|
||||
|
||||
#include "cge.h"
|
||||
#include "dup_n.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgezd_f64(simde_float64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgezd_f64(a));
|
||||
#else
|
||||
return (a >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezd_f64
|
||||
#define vcgezd_f64(a) simde_vcgezd_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgezd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgezd_s64(a));
|
||||
#else
|
||||
return (a >= 0) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezd_s64
|
||||
#define vcgezd_s64(a) simde_vcgezd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcgezs_f32(simde_float32_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcgezs_f32(a));
|
||||
#else
|
||||
return (a >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezs_f32
|
||||
#define vcgezs_f32(a) simde_vcgezs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgezq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezs_f32(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_f32
|
||||
#define vcgezq_f32(a) simde_vcgezq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgezq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezd_f64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_f64
|
||||
#define vcgezq_f64(a) simde_vcgezq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgezq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_s8(a, simde_vdupq_n_s8(0));
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_s8
|
||||
#define vcgezq_s8(a) simde_vcgezq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgezq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_s16(a, simde_vdupq_n_s16(0));
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_s16
|
||||
#define vcgezq_s16(a) simde_vcgezq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgezq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_s32(a, simde_vdupq_n_s32(0));
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_s32
|
||||
#define vcgezq_s32(a) simde_vcgezq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgezq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgezq_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgeq_s64(a, simde_vdupq_n_s64(0));
|
||||
#else
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgezq_s64
|
||||
#define vcgezq_s64(a) simde_vcgezq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgez_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezs_f32(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_f32
|
||||
#define vcgez_f32(a) simde_vcgez_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgez_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x1_private a_ = simde_float64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezd_f64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_f64
|
||||
#define vcgez_f64(a) simde_vcgez_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcgez_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_s8(a, simde_vdup_n_s8(0));
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_s8
|
||||
#define vcgez_s8(a) simde_vcgez_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcgez_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_s16(a, simde_vdup_n_s16(0));
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_s16
|
||||
#define vcgez_s16(a) simde_vcgez_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgez_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_s32(a, simde_vdup_n_s32(0));
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_s32
|
||||
#define vcgez_s32(a) simde_vcgez_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgez_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgez_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcge_s64(a, simde_vdup_n_s64(0));
|
||||
#else
|
||||
simde_int64x1_private a_ = simde_int64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgezd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgez_s64
|
||||
#define vcgez_s64(a) simde_vcgez_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CGEZ_H) */
|
||||
743
lib/simd_wrapper/simde/arm/neon/cgt.h
Normal file
743
lib/simd_wrapper/simde/arm/neon/cgt.h
Normal file
@@ -0,0 +1,743 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CGT_H)
|
||||
#define SIMDE_ARM_NEON_CGT_H
|
||||
|
||||
#include "combine.h"
|
||||
#include "get_low.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgtd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_f64(a, b));
|
||||
#else
|
||||
return (a > b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtd_f64
|
||||
#define vcgtd_f64(a, b) simde_vcgtd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgtd_s64(int64_t a, int64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_s64(a, b));
|
||||
#else
|
||||
return (a > b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtd_s64
|
||||
#define vcgtd_s64(a, b) simde_vcgtd_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgtd_u64(uint64_t a, uint64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgtd_u64(a, b));
|
||||
#else
|
||||
return (a > b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtd_u64
|
||||
#define vcgtd_u64(a, b) simde_vcgtd_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcgts_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcgts_f32(a, b));
|
||||
#else
|
||||
return (a > b) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgts_f32
|
||||
#define vcgts_f32(a, b) simde_vcgts_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgtq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_f32
|
||||
#define vcgtq_f32(a, b) simde_vcgtq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgtq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_f64
|
||||
#define vcgtq_f64(a, b) simde_vcgtq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgtq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpgt_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_s8
|
||||
#define vcgtq_s8(a, b) simde_vcgtq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgtq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpgt_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_s16
|
||||
#define vcgtq_s16(a, b) simde_vcgtq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgtq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmpgt_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_s32
|
||||
#define vcgtq_s32(a, b) simde_vcgtq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgtq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtq_s64(a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(b, a), 63));
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_cmpgt_epi64(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://stackoverflow.com/a/65175746/501126 */
|
||||
__m128i r = _mm_and_si128(_mm_cmpeq_epi32(a_.m128i, b_.m128i), _mm_sub_epi64(b_.m128i, a_.m128i));
|
||||
r = _mm_or_si128(r, _mm_cmpgt_epi32(a_.m128i, b_.m128i));
|
||||
r_.m128i = _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_s64
|
||||
#define vcgtq_s64(a, b) simde_vcgtq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgtq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i tmp = _mm_subs_epu8(a_.m128i, b_.m128i);
|
||||
r_.m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_u8
|
||||
#define vcgtq_u8(a, b) simde_vcgtq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgtq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i tmp = _mm_subs_epu16(a_.m128i, b_.m128i);
|
||||
r_.m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_u16
|
||||
#define vcgtq_u16(a, b) simde_vcgtq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgtq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgtq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_xor_si128(
|
||||
_mm_cmpgt_epi32(a_.m128i, b_.m128i),
|
||||
_mm_srai_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 31)
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_gt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_u32
|
||||
#define vcgtq_u32(a, b) simde_vcgtq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgtq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b));
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
__m128i sign_bit = _mm_set1_epi64x(INT64_MIN);
|
||||
r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bit), _mm_xor_si128(b_.m128i, sign_bit));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtq_u64
|
||||
#define vcgtq_u64(a, b) simde_vcgtq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgt_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_f32
|
||||
#define vcgt_f32(a, b) simde_vcgt_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgt_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgt_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_f64
|
||||
#define vcgt_f64(a, b) simde_vcgt_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcgt_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi8(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_s8
|
||||
#define vcgt_s8(a, b) simde_vcgt_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcgt_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi16(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_s16
|
||||
#define vcgt_s16(a, b) simde_vcgt_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgt_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi32(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_s32
|
||||
#define vcgt_s32(a, b) simde_vcgt_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgt_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgt_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_s64
|
||||
#define vcgt_s64(a, b) simde_vcgt_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcgt_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bit = _mm_set1_pi8(INT8_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_u8
|
||||
#define vcgt_u8(a, b) simde_vcgt_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcgt_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bit = _mm_set1_pi16(INT16_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_u16
|
||||
#define vcgt_u16(a, b) simde_vcgt_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgt_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcgt_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bit = _mm_set1_pi32(INT32_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_u32
|
||||
#define vcgt_u32(a, b) simde_vcgt_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgt_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgt_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgt_u64
|
||||
#define vcgt_u64(a, b) simde_vcgt_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CGT_H) */
|
||||
422
lib/simd_wrapper/simde/arm/neon/cgtz.h
Normal file
422
lib/simd_wrapper/simde/arm/neon/cgtz.h
Normal file
@@ -0,0 +1,422 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CGTZ_H)
|
||||
#define SIMDE_ARM_NEON_CGTZ_H
|
||||
|
||||
#include "cgt.h"
|
||||
#include "combine.h"
|
||||
#include "dup_n.h"
|
||||
#include "get_low.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgtzd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_s64(a));
|
||||
#else
|
||||
return (a > 0) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzd_s64
|
||||
#define vcgtzd_s64(a) simde_vcgtzd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcgtzd_f64(simde_float64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_f64(a));
|
||||
#else
|
||||
return (a > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzd_f64
|
||||
#define vcgtzd_f64(a) simde_vcgtzd_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcgtzs_f32(simde_float32_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcgtzs_f32(a));
|
||||
#else
|
||||
return (a > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzs_f32
|
||||
#define vcgtzs_f32(a) simde_vcgtzs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgtzq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_f32
|
||||
#define vcgtzq_f32(a) simde_vcgtzq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgtzq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_f64
|
||||
#define vcgtzq_f64(a) simde_vcgtzq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcgtzq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_s8(a, simde_vdupq_n_s8(0));
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_s8
|
||||
#define vcgtzq_s8(a) simde_vcgtzq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcgtzq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_s16(a, simde_vdupq_n_s16(0));
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_s16
|
||||
#define vcgtzq_s16(a) simde_vcgtzq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcgtzq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_s32(a, simde_vdupq_n_s32(0));
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_s32
|
||||
#define vcgtzq_s32(a) simde_vcgtzq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcgtzq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtzq_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgtq_s64(a, simde_vdupq_n_s64(0));
|
||||
#else
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtzq_s64
|
||||
#define vcgtzq_s64(a) simde_vcgtzq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgtz_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_f32
|
||||
#define vcgtz_f32(a) simde_vcgtz_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgtz_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x1_private a_ = simde_float64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_f64
|
||||
#define vcgtz_f64(a) simde_vcgtz_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcgtz_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_s8(a, simde_vdup_n_s8(0));
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_s8
|
||||
#define vcgtz_s8(a) simde_vcgtz_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcgtz_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_s16(a, simde_vdup_n_s16(0));
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_s16
|
||||
#define vcgtz_s16(a) simde_vcgtz_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcgtz_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_s32(a, simde_vdup_n_s32(0));
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_s32
|
||||
#define vcgtz_s32(a) simde_vcgtz_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcgtz_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcgtz_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcgt_s64(a, simde_vdup_n_s64(0));
|
||||
#else
|
||||
simde_int64x1_private a_ = simde_int64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcgtz_s64
|
||||
#define vcgtz_s64(a) simde_vcgtz_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CGTZ_H) */
|
||||
776
lib/simd_wrapper/simde/arm/neon/cle.h
Normal file
776
lib/simd_wrapper/simde/arm/neon/cle.h
Normal file
@@ -0,0 +1,776 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLE_H)
|
||||
#define SIMDE_ARM_NEON_CLE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcled_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcled_f64(a, b));
|
||||
#else
|
||||
return (a <= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcled_f64
|
||||
#define vcled_f64(a, b) simde_vcled_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcled_s64(int64_t a, int64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcled_s64(a, b));
|
||||
#else
|
||||
return (a <= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcled_s64
|
||||
#define vcled_s64(a, b) simde_vcled_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcled_u64(uint64_t a, uint64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcled_u64(a, b));
|
||||
#else
|
||||
return (a <= b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcled_u64
|
||||
#define vcled_u64(a, b) simde_vcled_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcles_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcles_f32(a, b));
|
||||
#else
|
||||
return (a <= b) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcles_f32
|
||||
#define vcles_f32(a, b) simde_vcles_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
|
||||
#else
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_cmple_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_f32
|
||||
#define vcleq_f32(a, b) simde_vcleq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcleq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcleq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
|
||||
#else
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castpd_si128(_mm_cmple_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_f64
|
||||
#define vcleq_f64(a, b) simde_vcleq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcleq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b));
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(b_.m128i, a_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_s8
|
||||
#define vcleq_s8(a, b) simde_vcleq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcleq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b));
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(b_.m128i, a_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_s16
|
||||
#define vcleq_s16(a, b) simde_vcleq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcleq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(b_.m128i, a_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_s32
|
||||
#define vcleq_s32(a, b) simde_vcleq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcleq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcleq_s64(a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(b, a), 63))));
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(b_.m128i, a_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_s64
|
||||
#define vcleq_s64(a, b) simde_vcleq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcleq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b));
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* http://www.alfredklomp.com/programming/sse-intrinsics/ */
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi8(
|
||||
_mm_min_epu8(a_.m128i, b_.m128i),
|
||||
a_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_u8
|
||||
#define vcleq_u8(a, b) simde_vcleq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcleq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b));
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi16(
|
||||
_mm_min_epu16(a_.m128i, b_.m128i),
|
||||
a_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
|
||||
r_.m128i =
|
||||
_mm_or_si128(
|
||||
_mm_cmpgt_epi16(
|
||||
_mm_xor_si128(b_.m128i, sign_bits),
|
||||
_mm_xor_si128(a_.m128i, sign_bits)
|
||||
),
|
||||
_mm_cmpeq_epi16(a_.m128i, b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_u16
|
||||
#define vcleq_u16(a, b) simde_vcleq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcleq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcleq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b));
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi32(
|
||||
_mm_min_epu32(a_.m128i, b_.m128i),
|
||||
a_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
|
||||
r_.m128i =
|
||||
_mm_or_si128(
|
||||
_mm_cmpgt_epi32(
|
||||
_mm_xor_si128(b_.m128i, sign_bits),
|
||||
_mm_xor_si128(a_.m128i, sign_bits)
|
||||
),
|
||||
_mm_cmpeq_epi32(a_.m128i, b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_le(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_u32
|
||||
#define vcleq_u32(a, b) simde_vcleq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcleq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b));
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i =
|
||||
_mm_cmpeq_epi64(
|
||||
_mm_min_epu64(a_.m128i, b_.m128i),
|
||||
a_.m128i
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
|
||||
r_.m128i =
|
||||
_mm_or_si128(
|
||||
_mm_cmpgt_epi64(
|
||||
_mm_xor_si128(b_.m128i, sign_bits),
|
||||
_mm_xor_si128(a_.m128i, sign_bits)
|
||||
),
|
||||
_mm_cmpeq_epi64(a_.m128i, b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcleq_u64
|
||||
#define vcleq_u64(a, b) simde_vcleq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_f32
|
||||
#define vcle_f32(a, b) simde_vcle_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcle_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcle_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_f64
|
||||
#define vcle_f64(a, b) simde_vcle_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcle_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(b_.m64, a_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_s8
|
||||
#define vcle_s8(a, b) simde_vcle_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcle_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(b_.m64, a_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_s16
|
||||
#define vcle_s16(a, b) simde_vcle_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcle_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(b_.m64, a_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_s32
|
||||
#define vcle_s32(a, b) simde_vcle_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcle_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcle_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_s64
|
||||
#define vcle_s64(a, b) simde_vcle_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcle_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_u8
|
||||
#define vcle_u8(a, b) simde_vcle_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcle_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_u16
|
||||
#define vcle_u16(a, b) simde_vcle_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcle_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcle_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
|
||||
r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_u32
|
||||
#define vcle_u32(a, b) simde_vcle_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcle_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcle_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcle_u64
|
||||
#define vcle_u64(a, b) simde_vcle_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLE_H) */
|
||||
420
lib/simd_wrapper/simde/arm/neon/clez.h
Normal file
420
lib/simd_wrapper/simde/arm/neon/clez.h
Normal file
@@ -0,0 +1,420 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLEZ_H)
|
||||
#define SIMDE_ARM_NEON_CLEZ_H
|
||||
|
||||
#include "cle.h"
|
||||
#include "dup_n.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vclezd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vclezd_s64(a));
|
||||
#else
|
||||
return (a <= 0) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezd_s64
|
||||
#define vclezd_s64(a) simde_vclezd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vclezd_f64(simde_float64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vclezd_f64(a));
|
||||
#else
|
||||
return (a <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezd_f64
|
||||
#define vclezd_f64(a) simde_vclezd_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vclezs_f32(simde_float32_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vclezs_f32(a));
|
||||
#else
|
||||
return (a <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezs_f32
|
||||
#define vclezs_f32(a) simde_vclezs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vclezq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_f32
|
||||
#define vclezq_f32(a) simde_vclezq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vclezq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_f64
|
||||
#define vclezq_f64(a) simde_vclezq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vclezq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_s8(a, simde_vdupq_n_s8(0));
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_s8
|
||||
#define vclezq_s8(a) simde_vclezq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vclezq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_s16(a, simde_vdupq_n_s16(0));
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_s16
|
||||
#define vclezq_s16(a) simde_vclezq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vclezq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_s32(a, simde_vdupq_n_s32(0));
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_s32
|
||||
#define vclezq_s32(a) simde_vclezq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vclezq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclezq_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcleq_s64(a, simde_vdupq_n_s64(0));
|
||||
#else
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclezq_s64
|
||||
#define vclezq_s64(a) simde_vclezq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclez_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_f32
|
||||
#define vclez_f32(a) simde_vclez_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vclez_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x1_private a_ = simde_float64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_f64
|
||||
#define vclez_f64(a) simde_vclez_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vclez_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_s8(a, simde_vdup_n_s8(0));
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_s8
|
||||
#define vclez_s8(a) simde_vclez_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vclez_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_s16(a, simde_vdup_n_s16(0));
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_s16
|
||||
#define vclez_s16(a) simde_vclez_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclez_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_s32(a, simde_vdup_n_s32(0));
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_s32
|
||||
#define vclez_s32(a) simde_vclez_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vclez_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclez_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcle_s64(a, simde_vdup_n_s64(0));
|
||||
#else
|
||||
simde_int64x1_private a_ = simde_int64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclez_s64
|
||||
#define vclez_s64(a) simde_vclez_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLEZ_H) */
|
||||
148
lib/simd_wrapper/simde/arm/neon/cls.h
Normal file
148
lib/simd_wrapper/simde/arm/neon/cls.h
Normal file
@@ -0,0 +1,148 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLS_H)
|
||||
#define SIMDE_ARM_NEON_CLS_H
|
||||
|
||||
#include "types.h"
|
||||
#include "bsl.h"
|
||||
#include "clz.h"
|
||||
#include "cltz.h"
|
||||
#include "dup_n.h"
|
||||
#include "mvn.h"
|
||||
#include "sub.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vcls_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcls_s8(a);
|
||||
#else
|
||||
return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcls_s8
|
||||
#define vcls_s8(a) simde_vcls_s8(a)
|
||||
#undef vcls_u8
|
||||
#define vcls_u8(a) simde_vcls_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vcls_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcls_s16(a);
|
||||
#else
|
||||
return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcls_s16
|
||||
#define vcls_s16(a) simde_vcls_s16(a)
|
||||
#undef vcls_u16
|
||||
#define vcls_u16(a) simde_vcls_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vcls_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcls_s32(a);
|
||||
#else
|
||||
return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcls_s32
|
||||
#define vcls_s32(a) simde_vcls_s32(a)
|
||||
#undef vcls_u32
|
||||
#define vcls_u32(a) simde_vcls_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vclsq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclsq_s8(a);
|
||||
#else
|
||||
return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclsq_s8
|
||||
#define vclsq_s8(a) simde_vclsq_s8(a)
|
||||
#undef vclsq_u8
|
||||
#define vclsq_u8(a) simde_vclsq_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vclsq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclsq_s16(a);
|
||||
#else
|
||||
return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclsq_s16
|
||||
#define vclsq_s16(a) simde_vclsq_s16(a)
|
||||
#undef vclsq_u16
|
||||
#define vclsq_u16(a) simde_vclsq_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vclsq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclsq_s32(a);
|
||||
#else
|
||||
return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1)));
|
||||
#endif
|
||||
}
|
||||
#define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a))
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclsq_s32
|
||||
#define vclsq_s32(a) simde_vclsq_s32(a)
|
||||
#undef vclsq_u32
|
||||
#define vclsq_u32(a) simde_vclsq_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLS_H) */
|
||||
751
lib/simd_wrapper/simde/arm/neon/clt.h
Normal file
751
lib/simd_wrapper/simde/arm/neon/clt.h
Normal file
@@ -0,0 +1,751 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLT_H)
|
||||
#define SIMDE_ARM_NEON_CLT_H
|
||||
|
||||
#include "combine.h"
|
||||
#include "get_low.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcltd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcltd_f64(a, b));
|
||||
#else
|
||||
return (a < b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltd_f64
|
||||
#define vcltd_f64(a, b) simde_vcltd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcltd_s64(int64_t a, int64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcltd_s64(a, b));
|
||||
#else
|
||||
return (a < b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltd_s64
|
||||
#define vcltd_s64(a, b) simde_vcltd_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcltd_u64(uint64_t a, uint64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcltd_u64(a, b));
|
||||
#else
|
||||
return (a < b) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltd_u64
|
||||
#define vcltd_u64(a, b) simde_vcltd_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vclts_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vclts_f32(a, b));
|
||||
#else
|
||||
return (a < b) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclts_f32
|
||||
#define vclts_f32(a, b) simde_vclts_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcltq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_cmplt_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_f32
|
||||
#define vcltq_f32(a, b) simde_vcltq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcltq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castpd_si128(_mm_cmplt_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_f64
|
||||
#define vcltq_f64(a, b) simde_vcltq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcltq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmplt_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_s8
|
||||
#define vcltq_s8(a, b) simde_vcltq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcltq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmplt_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_s16
|
||||
#define vcltq_s16(a, b) simde_vcltq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcltq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_cmplt_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_s32
|
||||
#define vcltq_s32(a, b) simde_vcltq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcltq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltq_s64(a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(a, b), 63));
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_cmpgt_epi64(b_.m128i, a_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_s64
|
||||
#define vcltq_s64(a, b) simde_vcltq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcltq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(
|
||||
_mm_cmpeq_epi8(b_.m128i, a_.m128i),
|
||||
_mm_cmpeq_epi8(_mm_max_epu8(b_.m128i, a_.m128i), b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_u8
|
||||
#define vcltq_u8(a, b) simde_vcltq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcltq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(
|
||||
_mm_cmpeq_epi16(b_.m128i, a_.m128i),
|
||||
_mm_cmpeq_epi16(_mm_max_epu16(b_.m128i, a_.m128i), b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
|
||||
r_.m128i = _mm_cmplt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_u16
|
||||
#define vcltq_u16(a, b) simde_vcltq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcltq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcltq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(
|
||||
_mm_cmpeq_epi32(b_.m128i, a_.m128i),
|
||||
_mm_cmpeq_epi32(_mm_max_epu32(b_.m128i, a_.m128i), b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
|
||||
r_.m128i = _mm_cmplt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_lt(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_u32
|
||||
#define vcltq_u32(a, b) simde_vcltq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcltq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b));
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(
|
||||
_mm_cmpeq_epi64(b_.m128i, a_.m128i),
|
||||
_mm_cmpeq_epi64(_mm_max_epu64(b_.m128i, a_.m128i), b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
|
||||
r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(b_.m128i, sign_bits), _mm_xor_si128(a_.m128i, sign_bits));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltq_u64
|
||||
#define vcltq_u64(a, b) simde_vcltq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclt_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_f32
|
||||
#define vclt_f32(a, b) simde_vclt_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vclt_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclt_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_f64
|
||||
#define vclt_f64(a, b) simde_vclt_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vclt_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi8(b_.m64, a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_s8
|
||||
#define vclt_s8(a, b) simde_vclt_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vclt_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi16(b_.m64, a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_s16
|
||||
#define vclt_s16(a, b) simde_vclt_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclt_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_cmpgt_pi32(b_.m64, a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_s32
|
||||
#define vclt_s32(a, b) simde_vclt_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vclt_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclt_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_s64
|
||||
#define vclt_s64(a, b) simde_vclt_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vclt_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi8(INT8_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_u8
|
||||
#define vclt_u8(a, b) simde_vclt_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vclt_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi16(INT16_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_u16
|
||||
#define vclt_u16(a, b) simde_vclt_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclt_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclt_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
__m64 sign_bits = _mm_set1_pi32(INT32_MIN);
|
||||
r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_u32
|
||||
#define vclt_u32(a, b) simde_vclt_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vclt_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vclt_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclt_u64
|
||||
#define vclt_u64(a, b) simde_vclt_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLT_H) */
|
||||
327
lib/simd_wrapper/simde/arm/neon/cltz.h
Normal file
327
lib/simd_wrapper/simde/arm/neon/cltz.h
Normal file
@@ -0,0 +1,327 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
/* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLTZ_H)
|
||||
#define SIMDE_ARM_NEON_CLTZ_H
|
||||
|
||||
#include "types.h"
|
||||
#include "shr_n.h"
|
||||
#include "reinterpret.h"
|
||||
#include "clt.h"
|
||||
#include "dup_n.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcltzd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcltzd_s64(a));
|
||||
#else
|
||||
return (a < 0) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzd_s64
|
||||
#define vcltzd_s64(a) simde_vcltzd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vcltzd_f64(simde_float64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint64_t, vcltzd_f64(a));
|
||||
#else
|
||||
return (a < SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzd_f64
|
||||
#define vcltzd_f64(a) simde_vcltzd_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vcltzs_f32(simde_float32_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return HEDLEY_STATIC_CAST(uint32_t, vcltzs_f32(a));
|
||||
#else
|
||||
return (a < SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzs_f32
|
||||
#define vcltzs_f32(a) simde_vcltzs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcltz_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_f32
|
||||
#define vcltz_f32(a) simde_vcltz_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcltz_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x1_private a_ = simde_float64x1_to_private(a);
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_f64
|
||||
#define vcltz_f64(a) simde_vcltz_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcltz_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_s8(a, simde_vdup_n_s8(0));
|
||||
#else
|
||||
return simde_vreinterpret_u8_s8(simde_vshr_n_s8(a, 7));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_s8
|
||||
#define vcltz_s8(a) simde_vcltz_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcltz_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_s16(a, simde_vdup_n_s16(0));
|
||||
#else
|
||||
return simde_vreinterpret_u16_s16(simde_vshr_n_s16(a, 15));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_s16
|
||||
#define vcltz_s16(a) simde_vcltz_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcltz_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_s32(a, simde_vdup_n_s32(0));
|
||||
#else
|
||||
return simde_vreinterpret_u32_s32(simde_vshr_n_s32(a, 31));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_s32
|
||||
#define vcltz_s32(a) simde_vcltz_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcltz_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltz_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vclt_s64(a, simde_vdup_n_s64(0));
|
||||
#else
|
||||
return simde_vreinterpret_u64_s64(simde_vshr_n_s64(a, 63));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltz_s64
|
||||
#define vcltz_s64(a) simde_vcltz_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcltzq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_f32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0)));
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_f32
|
||||
#define vcltzq_f32(a) simde_vcltzq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcltzq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_f64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0)));
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_f64
|
||||
#define vcltzq_f64(a) simde_vcltzq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcltzq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_s8(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_s8(a, simde_vdupq_n_s8(0));
|
||||
#else
|
||||
return simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(a, 7));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_s8
|
||||
#define vcltzq_s8(a) simde_vcltzq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcltzq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_s16(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_s16(a, simde_vdupq_n_s16(0));
|
||||
#else
|
||||
return simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(a, 15));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_s16
|
||||
#define vcltzq_s16(a) simde_vcltzq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcltzq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_s32(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_s32(a, simde_vdupq_n_s32(0));
|
||||
#else
|
||||
return simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(a, 31));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_s32
|
||||
#define vcltzq_s32(a) simde_vcltzq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcltzq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcltzq_s64(a);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vcltq_s64(a, simde_vdupq_n_s64(0));
|
||||
#else
|
||||
return simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(a, 63));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcltzq_s64
|
||||
#define vcltzq_s64(a) simde_vcltzq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLTZ_H) */
|
||||
427
lib/simd_wrapper/simde/arm/neon/clz.h
Normal file
427
lib/simd_wrapper/simde/arm/neon/clz.h
Normal file
@@ -0,0 +1,427 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CLZ_H)
|
||||
#define SIMDE_ARM_NEON_CLZ_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_x_vclzb_u8(uint8_t a) {
|
||||
#if \
|
||||
defined(SIMDE_BUILTIN_SUFFIX_8_) && \
|
||||
( \
|
||||
SIMDE_BUILTIN_HAS_8_(clz) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
|
||||
)
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(r);
|
||||
|
||||
return HEDLEY_STATIC_CAST(uint8_t, SIMDE_BUILTIN_8_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_8_, a)));
|
||||
#else
|
||||
uint8_t r;
|
||||
uint8_t shift;
|
||||
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(r);
|
||||
|
||||
r = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x0F)) << 2); a >>= r;
|
||||
shift = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x03)) << 1); a >>= shift; r |= shift;
|
||||
r |= (a >> 1);
|
||||
|
||||
return ((8 * sizeof(r)) - 1) - r;
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_x_vclzh_u16(uint16_t a) {
|
||||
#if \
|
||||
defined(SIMDE_BUILTIN_SUFFIX_16_) && \
|
||||
( \
|
||||
SIMDE_BUILTIN_HAS_16_(clz) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
|
||||
)
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(r);
|
||||
|
||||
return HEDLEY_STATIC_CAST(uint16_t, SIMDE_BUILTIN_16_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_16_, a)));
|
||||
#else
|
||||
uint16_t r;
|
||||
uint16_t shift;
|
||||
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(r);
|
||||
|
||||
r = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x00FF)) << 3); a >>= r;
|
||||
shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x000F)) << 2); a >>= shift; r |= shift;
|
||||
shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x0003)) << 1); a >>= shift; r |= shift;
|
||||
r |= (a >> 1);
|
||||
|
||||
return ((8 * sizeof(r)) - 1) - r;
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_x_vclzs_u32(uint32_t a) {
|
||||
#if \
|
||||
defined(SIMDE_BUILTIN_SUFFIX_32_) && \
|
||||
( \
|
||||
SIMDE_BUILTIN_HAS_32_(clz) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(13,1,0) \
|
||||
)
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(a);
|
||||
|
||||
return HEDLEY_STATIC_CAST(uint32_t, SIMDE_BUILTIN_32_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_32_, a)));
|
||||
#else
|
||||
uint32_t r;
|
||||
uint32_t shift;
|
||||
|
||||
if (HEDLEY_UNLIKELY(a == 0))
|
||||
return 8 * sizeof(a);
|
||||
|
||||
r = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0xFFFF)) << 4); a >>= r;
|
||||
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x00FF)) << 3); a >>= shift; r |= shift;
|
||||
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x000F)) << 2); a >>= shift; r |= shift;
|
||||
shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x0003)) << 1); a >>= shift; r |= shift;
|
||||
r |= (a >> 1);
|
||||
|
||||
return ((8 * sizeof(r)) - 1) - r;
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_x_vclzb_s8(int8_t a) {
|
||||
return HEDLEY_STATIC_CAST(int8_t, simde_x_vclzb_u8(HEDLEY_STATIC_CAST(uint8_t, a)));
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_x_vclzh_s16(int16_t a) {
|
||||
return HEDLEY_STATIC_CAST(int16_t, simde_x_vclzh_u16(HEDLEY_STATIC_CAST(uint16_t, a)));
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_x_vclzs_s32(int32_t a) {
|
||||
return HEDLEY_STATIC_CAST(int32_t, simde_x_vclzs_u32(HEDLEY_STATIC_CAST(uint32_t, a)));
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vclz_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzb_s8(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_s8
|
||||
#define vclz_s8(a) simde_vclz_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vclz_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_s16(a);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzh_s16(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_s16
|
||||
#define vclz_s16(a) simde_vclz_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vclz_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_s32(a);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzs_s32(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_s32
|
||||
#define vclz_s32(a) simde_vclz_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vclz_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzb_u8(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_u8
|
||||
#define vclz_u8(a) simde_vclz_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vclz_u16(simde_uint16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzh_u16(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_u16
|
||||
#define vclz_u16(a) simde_vclz_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vclz_u32(simde_uint32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclz_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzs_u32(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclz_u32
|
||||
#define vclz_u32(a) simde_vclz_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vclzq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_s8(a);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_GFNI_NATIVE)
|
||||
/* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */
|
||||
a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0);
|
||||
a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i);
|
||||
r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzb_s8(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_s8
|
||||
#define vclzq_s8(a) simde_vclzq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vclzq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_s16(a);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzh_s16(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_s16
|
||||
#define vclzq_s16(a) simde_vclzq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vclzq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_s32(a);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzs_s32(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_s32
|
||||
#define vclzq_s32(a) simde_vclzq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vclzq_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_u8(a);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_GFNI_NATIVE)
|
||||
a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0);
|
||||
a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i);
|
||||
r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzb_u8(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_u8
|
||||
#define vclzq_u8(a) simde_vclzq_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vclzq_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_u16(a);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzh_u16(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_u16
|
||||
#define vclzq_u16(a) simde_vclzq_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vclzq_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vclzq_u32(a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
r_;
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_vclzs_u32(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vclzq_u32
|
||||
#define vclzq_u32(a) simde_vclzq_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CLZ_H) */
|
||||
132
lib/simd_wrapper/simde/arm/neon/cmla.h
Normal file
132
lib/simd_wrapper/simde/arm/neon/cmla.h
Normal file
@@ -0,0 +1,132 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CMLA_H)
|
||||
#define SIMDE_ARM_NEON_CMLA_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vcmla_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmla_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_ = simde_float32x2_to_private(r),
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] += b_.values[i] * a_.values[i & 2];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmla_f32
|
||||
#define vcmla_f32(r, a, b) simde_vcmla_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vcmlaq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_ = simde_float32x4_to_private(r),
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] += b_.values[i] * a_.values[i & 2];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_f32
|
||||
#define vcmlaq_f32(r, a, b) simde_vcmlaq_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vcmlaq_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_f64(r, a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_ = simde_float64x2_to_private(r),
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] += b_.values[i] * a_.values[i & 2];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_f64
|
||||
#define vcmlaq_f64(r, a, b) simde_vcmlaq_f64(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */
|
||||
138
lib/simd_wrapper/simde/arm/neon/cmla_rot180.h
Normal file
138
lib/simd_wrapper/simde/arm/neon/cmla_rot180.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_H)
|
||||
#define SIMDE_ARM_NEON_CMLA_ROT180_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vcmla_rot180_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmla_rot180_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_ = simde_float32x2_to_private(r),
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmla_rot180_f32
|
||||
#define vcmla_rot180_f32(r, a, b) simde_vcmla_rot180_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vcmlaq_rot180_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot180_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_ = simde_float32x4_to_private(r),
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, -b_.values, 0, 1, 2, 3);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot180_f32
|
||||
#define vcmlaq_rot180_f32(r, a, b) simde_vcmlaq_rot180_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vcmlaq_rot180_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot180_f64(r, a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_ = simde_float64x2_to_private(r),
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, -b_.values, 0, 1);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot180_f64
|
||||
#define vcmlaq_rot180_f64(r, a, b) simde_vcmlaq_rot180_f64(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) */
|
||||
138
lib/simd_wrapper/simde/arm/neon/cmla_rot270.h
Normal file
138
lib/simd_wrapper/simde/arm/neon/cmla_rot270.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_H)
|
||||
#define SIMDE_ARM_NEON_CMLA_ROT270_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vcmla_rot270_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmla_rot270_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_ = simde_float32x2_to_private(r),
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmla_rot270_f32
|
||||
#define vcmla_rot270_f32(r, a, b) simde_vcmla_rot270_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vcmlaq_rot270_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot270_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_ = simde_float32x4_to_private(r),
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot270_f32
|
||||
#define vcmlaq_rot270_f32(r, a, b) simde_vcmlaq_rot270_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vcmlaq_rot270_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot270_f64(r, a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_ = simde_float64x2_to_private(r),
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot270_f64
|
||||
#define vcmlaq_rot270_f64(r, a, b) simde_vcmlaq_rot270_f64(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) */
|
||||
138
lib/simd_wrapper/simde/arm/neon/cmla_rot90.h
Normal file
138
lib/simd_wrapper/simde/arm/neon/cmla_rot90.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_H)
|
||||
#define SIMDE_ARM_NEON_CMLA_ROT90_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vcmla_rot90_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmla_rot90_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_ = simde_float32x2_to_private(r),
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmla_rot90_f32
|
||||
#define vcmla_rot90_f32(r, a, b) simde_vcmla_rot90_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vcmlaq_rot90_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot90_f32(r, a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_ = simde_float32x4_to_private(r),
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot90_f32
|
||||
#define vcmlaq_rot90_f32(r, a, b) simde_vcmlaq_rot90_f32(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vcmlaq_rot90_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0))
|
||||
return vcmlaq_rot90_f64(r, a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_ = simde_float64x2_to_private(r),
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1);
|
||||
b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2);
|
||||
r_.values += b_.values * a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) {
|
||||
r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1];
|
||||
r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcmlaq_rot90_f64
|
||||
#define vcmlaq_rot90_f64(r, a, b) simde_vcmlaq_rot90_f64(r, a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) */
|
||||
170
lib/simd_wrapper/simde/arm/neon/cnt.h
Normal file
170
lib/simd_wrapper/simde/arm/neon/cnt.h
Normal file
@@ -0,0 +1,170 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CNT_H)
|
||||
#define SIMDE_ARM_NEON_CNT_H
|
||||
|
||||
#include "types.h"
|
||||
#include "reinterpret.h"
|
||||
#include <limits.h>
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_x_arm_neon_cntb(uint8_t v) {
|
||||
v = v - ((v >> 1) & (85));
|
||||
v = (v & (51)) + ((v >> (2)) & (51));
|
||||
v = (v + (v >> (4))) & (15);
|
||||
return HEDLEY_STATIC_CAST(uint8_t, v) >> (sizeof(uint8_t) - 1) * CHAR_BIT;
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vcnt_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcnt_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i])));
|
||||
}
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcnt_s8
|
||||
#define vcnt_s8(a) simde_vcnt_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcnt_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcnt_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]);
|
||||
}
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcnt_u8
|
||||
#define vcnt_u8(a) simde_vcnt_u8((a))
|
||||
#endif
|
||||
|
||||
/* The x86 implementations are stolen from
|
||||
* https://github.com/WebAssembly/simd/pull/379. They could be cleaned
|
||||
* up a bit if someone is bored; they're mostly just direct
|
||||
* translations from the assembly. */
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vcntq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcntq_s8(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a)));
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE)
|
||||
r_.m128i = _mm_popcnt_epi8(a_.m128i);
|
||||
#elif defined(SIMDE_X86_AVX2_NATIVE)
|
||||
__m128i tmp0 = _mm_set1_epi8(0x0f);
|
||||
__m128i tmp1 = _mm_andnot_si128(tmp0, a_.m128i);
|
||||
__m128i y = _mm_and_si128(tmp0, a_.m128i);
|
||||
tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
|
||||
tmp1 = _mm_srli_epi16(tmp1, 4);
|
||||
y = _mm_shuffle_epi8(tmp0, y);
|
||||
tmp1 = _mm_shuffle_epi8(tmp0, tmp1);
|
||||
r_.m128i = _mm_add_epi8(y, tmp1);
|
||||
#elif defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
__m128i tmp0 = _mm_set1_epi8(0x0f);
|
||||
__m128i tmp1 = a_.m128i;
|
||||
tmp1 = _mm_and_si128(tmp1, tmp0);
|
||||
tmp0 = _mm_andnot_si128(tmp0, a_.m128i);
|
||||
__m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
|
||||
tmp0 = _mm_srli_epi16(tmp0, 4);
|
||||
y = _mm_shuffle_epi8(y, tmp1);
|
||||
tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
|
||||
tmp1 = _mm_shuffle_epi8(tmp1, tmp0);
|
||||
r_.m128i = _mm_add_epi8(y, tmp1);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i tmp = _mm_and_si128(_mm_srli_epi16(a_.m128i, 1), _mm_set1_epi8(0x55));
|
||||
a_.m128i = _mm_sub_epi8(a_.m128i, tmp);
|
||||
tmp = a_.m128i;
|
||||
a_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x33));
|
||||
tmp = _mm_and_si128(_mm_srli_epi16(tmp, 2), _mm_set1_epi8(0x33));
|
||||
a_.m128i = _mm_add_epi8(a_.m128i, tmp);
|
||||
tmp = _mm_srli_epi16(a_.m128i, 4);
|
||||
a_.m128i = _mm_add_epi8(a_.m128i, tmp);
|
||||
r_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x0f));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i])));
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcntq_s8
|
||||
#define vcntq_s8(a) simde_vcntq_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcntq_u8(simde_uint8x16_t a) {
|
||||
return simde_vreinterpretq_u8_s8(simde_vcntq_s8(simde_vreinterpretq_s8_u8(a)));
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcntq_u8
|
||||
#define vcntq_u8(a) simde_vcntq_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CNT_H) */
|
||||
343
lib/simd_wrapper/simde/arm/neon/combine.h
Normal file
343
lib/simd_wrapper/simde/arm/neon/combine.h
Normal file
@@ -0,0 +1,343 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the folhighing conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_COMBINE_H)
|
||||
#define SIMDE_ARM_NEON_COMBINE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_f32(low, high);
|
||||
#else
|
||||
simde_float32x4_private r_;
|
||||
simde_float32x2_private
|
||||
low_ = simde_float32x2_to_private(low),
|
||||
high_ = simde_float32x2_to_private(high);
|
||||
|
||||
/* Note: __builtin_shufflevector can have a the output contain
|
||||
* twice the number of elements, __builtin_shuffle cannot.
|
||||
* Using SIMDE_SHUFFLE_VECTOR_ here would not work. */
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_f32
|
||||
#define vcombine_f32(low, high) simde_vcombine_f32((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcombine_f64(low, high);
|
||||
#else
|
||||
simde_float64x2_private r_;
|
||||
simde_float64x1_private
|
||||
low_ = simde_float64x1_to_private(low),
|
||||
high_ = simde_float64x1_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_f64
|
||||
#define vcombine_f64(low, high) simde_vcombine_f64((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_s8(low, high);
|
||||
#else
|
||||
simde_int8x16_private r_;
|
||||
simde_int8x8_private
|
||||
low_ = simde_int8x8_to_private(low),
|
||||
high_ = simde_int8x8_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_s8
|
||||
#define vcombine_s8(low, high) simde_vcombine_s8((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_s16(low, high);
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
simde_int16x4_private
|
||||
low_ = simde_int16x4_to_private(low),
|
||||
high_ = simde_int16x4_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_s16
|
||||
#define vcombine_s16(low, high) simde_vcombine_s16((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_s32(low, high);
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int32x2_private
|
||||
low_ = simde_int32x2_to_private(low),
|
||||
high_ = simde_int32x2_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_s32
|
||||
#define vcombine_s32(low, high) simde_vcombine_s32((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_s64(low, high);
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int64x1_private
|
||||
low_ = simde_int64x1_to_private(low),
|
||||
high_ = simde_int64x1_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_s64
|
||||
#define vcombine_s64(low, high) simde_vcombine_s64((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_u8(low, high);
|
||||
#else
|
||||
simde_uint8x16_private r_;
|
||||
simde_uint8x8_private
|
||||
low_ = simde_uint8x8_to_private(low),
|
||||
high_ = simde_uint8x8_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_u8
|
||||
#define vcombine_u8(low, high) simde_vcombine_u8((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_u16(low, high);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint16x4_private
|
||||
low_ = simde_uint16x4_to_private(low),
|
||||
high_ = simde_uint16x4_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_u16
|
||||
#define vcombine_u16(low, high) simde_vcombine_u16((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_u32(low, high);
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint32x2_private
|
||||
low_ = simde_uint32x2_to_private(low),
|
||||
high_ = simde_uint32x2_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_u32
|
||||
#define vcombine_u32(low, high) simde_vcombine_u32((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcombine_u64(low, high);
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint64x1_private
|
||||
low_ = simde_uint64x1_to_private(low),
|
||||
high_ = simde_uint64x1_to_private(high);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
|
||||
#else
|
||||
size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < halfway ; i++) {
|
||||
r_.values[i] = low_.values[i];
|
||||
r_.values[i + halfway] = high_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcombine_u64
|
||||
#define vcombine_u64(low, high) simde_vcombine_u64((low), (high))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */
|
||||
186
lib/simd_wrapper/simde/arm/neon/create.h
Normal file
186
lib/simd_wrapper/simde/arm/neon/create.h
Normal file
@@ -0,0 +1,186 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
/* N.B. CM: vcreate_f16 and vcreate_bf16 are omitted as
|
||||
* SIMDe has no 16-bit floating point support.
|
||||
* Idem for the poly types. */
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_CREATE_H)
|
||||
#define SIMDE_ARM_NEON_CREATE_H
|
||||
|
||||
#include "dup_n.h"
|
||||
#include "reinterpret.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vcreate_s8(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_s8(a);
|
||||
#else
|
||||
return simde_vreinterpret_s8_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_s8
|
||||
#define vcreate_s8(a) simde_vcreate_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vcreate_s16(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_s16(a);
|
||||
#else
|
||||
return simde_vreinterpret_s16_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_s16
|
||||
#define vcreate_s16(a) simde_vcreate_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vcreate_s32(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_s32(a);
|
||||
#else
|
||||
return simde_vreinterpret_s32_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_s32
|
||||
#define vcreate_s32(a) simde_vcreate_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vcreate_s64(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_s64(a);
|
||||
#else
|
||||
return simde_vreinterpret_s64_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_s64
|
||||
#define vcreate_s64(a) simde_vcreate_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vcreate_u8(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_u8(a);
|
||||
#else
|
||||
return simde_vreinterpret_u8_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_u8
|
||||
#define vcreate_u8(a) simde_vcreate_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vcreate_u16(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_u16(a);
|
||||
#else
|
||||
return simde_vreinterpret_u16_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_u16
|
||||
#define vcreate_u16(a) simde_vcreate_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vcreate_u32(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_u32(a);
|
||||
#else
|
||||
return simde_vreinterpret_u32_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_u32
|
||||
#define vcreate_u32(a) simde_vcreate_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vcreate_u64(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_u64(a);
|
||||
#else
|
||||
return simde_vdup_n_u64(a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_u64
|
||||
#define vcreate_u64(a) simde_vcreate_u64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vcreate_f32(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vcreate_f32(a);
|
||||
#else
|
||||
return simde_vreinterpret_f32_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_f32
|
||||
#define vcreate_f32(a) simde_vcreate_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vcreate_f64(uint64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vcreate_f64(a);
|
||||
#else
|
||||
return simde_vreinterpret_f64_u64(simde_vdup_n_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vcreate_f64
|
||||
#define vcreate_f64(a) simde_vcreate_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CREATE_H) */
|
||||
1175
lib/simd_wrapper/simde/arm/neon/cvt.h
Normal file
1175
lib/simd_wrapper/simde/arm/neon/cvt.h
Normal file
File diff suppressed because it is too large
Load Diff
171
lib/simd_wrapper/simde/arm/neon/dot.h
Normal file
171
lib/simd_wrapper/simde/arm/neon/dot.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_DOT_H)
|
||||
#define SIMDE_ARM_NEON_DOT_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "add.h"
|
||||
#include "combine.h"
|
||||
#include "dup_n.h"
|
||||
#include "get_low.h"
|
||||
#include "get_high.h"
|
||||
#include "paddl.h"
|
||||
#include "movn.h"
|
||||
#include "mull.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vdot_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
return vdot_s32(r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return simde_vadd_s32(r, simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(a, b)))));
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]);
|
||||
}
|
||||
r_.values[i] = acc;
|
||||
}
|
||||
return simde_vadd_s32(r, simde_int32x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_s32
|
||||
#define vdot_s32(r, a, b) simde_vdot_s32((r), (a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vdot_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
return vdot_u32(r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return simde_vadd_u32(r, simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(a, b)))));
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]);
|
||||
}
|
||||
r_.values[i] = acc;
|
||||
}
|
||||
return simde_vadd_u32(r, simde_uint32x2_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_u32
|
||||
#define vdot_u32(r, a, b) simde_vdot_u32((r), (a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vdotq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
return vdotq_s32(r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return simde_vaddq_s32(r,
|
||||
simde_vcombine_s32(simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_low_s8(a), simde_vget_low_s8(b))))),
|
||||
simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_high_s8(a), simde_vget_high_s8(b)))))));
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
for (int i = 0 ; i < 4 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]);
|
||||
}
|
||||
r_.values[i] = acc;
|
||||
}
|
||||
return simde_vaddq_s32(r, simde_int32x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_s32
|
||||
#define vdotq_s32(r, a, b) simde_vdotq_s32((r), (a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vdotq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
return vdotq_u32(r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return simde_vaddq_u32(r,
|
||||
simde_vcombine_u32(simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_low_u8(a), simde_vget_low_u8(b))))),
|
||||
simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_high_u8(a), simde_vget_high_u8(b)))))));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
for (int i = 0 ; i < 4 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]);
|
||||
}
|
||||
r_.values[i] = acc;
|
||||
}
|
||||
return simde_vaddq_u32(r, simde_uint32x4_from_private(r_));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_u32
|
||||
#define vdotq_u32(r, a, b) simde_vdotq_u32((r), (a), (b))
|
||||
#endif
|
||||
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_DOT_H) */
|
||||
491
lib/simd_wrapper/simde/arm/neon/dot_lane.h
Normal file
491
lib/simd_wrapper/simde/arm/neon/dot_lane.h
Normal file
@@ -0,0 +1,491 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_DOT_LANE_H)
|
||||
#define SIMDE_ARM_NEON_DOT_LANE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "add.h"
|
||||
#include "dup_lane.h"
|
||||
#include "paddl.h"
|
||||
#include "movn.h"
|
||||
#include "mull.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vdot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x2_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_2_(vdot_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x2_t
|
||||
b_lane,
|
||||
b_32 = vreinterpret_s32_s8(b);
|
||||
|
||||
SIMDE_CONSTIFY_2_(vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
result =
|
||||
vadd_s32(
|
||||
r,
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(a, vreinterpret_s8_s32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_int32x2_private r_ = simde_int32x2_to_private(r);
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_lane_s32
|
||||
#define vdot_lane_s32(r, a, b, lane) simde_vdot_lane_s32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vdot_lane_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x2_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_2_(vdot_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x2_t
|
||||
b_lane,
|
||||
b_32 = vreinterpret_u32_u8(b);
|
||||
|
||||
SIMDE_CONSTIFY_2_(vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
result =
|
||||
vadd_u32(
|
||||
r,
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(a, vreinterpret_u8_u32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_uint32x2_private r_ = simde_uint32x2_to_private(r);
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_lane_u32
|
||||
#define vdot_lane_u32(r, a, b, lane) simde_vdot_lane_u32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vdot_laneq_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x16_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x2_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_4_(vdot_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x2_t b_lane;
|
||||
simde_int32x4_t b_32 = vreinterpretq_s32_s8(b);
|
||||
|
||||
SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
result =
|
||||
vadd_s32(
|
||||
r,
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(a, vreinterpret_s8_s32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_int32x2_private r_ = simde_int32x2_to_private(r);
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
simde_int8x16_private b_ = simde_int8x16_to_private(b);
|
||||
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_laneq_s32
|
||||
#define vdot_laneq_s32(r, a, b, lane) simde_vdot_laneq_s32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vdot_laneq_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x16_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x2_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_4_(vdot_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x2_t b_lane;
|
||||
simde_uint32x4_t b_32 = vreinterpretq_u32_u8(b);
|
||||
|
||||
SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
result =
|
||||
vadd_u32(
|
||||
r,
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(a, vreinterpret_u8_u32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_uint32x2_private r_ = simde_uint32x2_to_private(r);
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
for (int i = 0 ; i < 2 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for (int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdot_laneq_u32
|
||||
#define vdot_laneq_u32(r, a, b, lane) simde_vdot_laneq_u32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vdotq_laneq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x4_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_4_(vdotq_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x4_t
|
||||
b_lane,
|
||||
b_32 = vreinterpretq_u32_u8(b);
|
||||
SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
|
||||
result =
|
||||
vcombine_u32(
|
||||
vadd_u32(
|
||||
vget_low_u32(r),
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(vget_low_u8(a), vget_low_u8(vreinterpretq_u8_u32(b_lane)))
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
vadd_u32(
|
||||
vget_high_u32(r),
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(vget_high_u8(a), vget_high_u8(vreinterpretq_u8_u32(b_lane)))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_uint32x4_private r_ = simde_uint32x4_to_private(r);
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
for(int i = 0 ; i < 4 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for(int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_laneq_u32
|
||||
#define vdotq_laneq_u32(r, a, b, lane) simde_vdotq_laneq_u32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vdotq_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x4_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_4_(vdotq_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x4_t
|
||||
b_lane,
|
||||
b_32 = vreinterpretq_s32_s8(b);
|
||||
SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
|
||||
result =
|
||||
vcombine_s32(
|
||||
vadd_s32(
|
||||
vget_low_s32(r),
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(vget_low_s8(a), vget_low_s8(vreinterpretq_s8_s32(b_lane)))
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
vadd_s32(
|
||||
vget_high_s32(r),
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(vget_high_s8(a), vget_high_s8(vreinterpretq_s8_s32(b_lane)))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_int32x4_private r_ = simde_int32x4_to_private(r);
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
for(int i = 0 ; i < 4 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for(int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_laneq_s32
|
||||
#define vdotq_laneq_s32(r, a, b, lane) simde_vdotq_laneq_s32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vdotq_lane_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x4_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_2_(vdotq_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x2_t
|
||||
b_lane,
|
||||
b_32 = vreinterpret_u32_u8(b);
|
||||
SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
|
||||
result =
|
||||
vcombine_u32(
|
||||
vadd_u32(
|
||||
vget_low_u32(r),
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(vget_low_u8(a), vreinterpret_u8_u32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
vadd_u32(
|
||||
vget_high_u32(r),
|
||||
vmovn_u64(
|
||||
vpaddlq_u32(
|
||||
vpaddlq_u16(
|
||||
vmull_u8(vget_high_u8(a), vreinterpret_u8_u32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_uint32x4_private r_ = simde_uint32x4_to_private(r);
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
simde_uint8x8_private b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
for(int i = 0 ; i < 4 ; i++) {
|
||||
uint32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for(int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_lane_u32
|
||||
#define vdotq_lane_u32(r, a, b, lane) simde_vdotq_lane_u32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vdotq_lane_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x4_t result;
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD)
|
||||
SIMDE_CONSTIFY_2_(vdotq_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x2_t
|
||||
b_lane,
|
||||
b_32 = vreinterpret_s32_s8(b);
|
||||
SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32);
|
||||
|
||||
result =
|
||||
vcombine_s32(
|
||||
vadd_s32(
|
||||
vget_low_s32(r),
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(vget_low_s8(a), vreinterpret_s8_s32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
vadd_s32(
|
||||
vget_high_s32(r),
|
||||
vmovn_s64(
|
||||
vpaddlq_s32(
|
||||
vpaddlq_s16(
|
||||
vmull_s8(vget_high_s8(a), vreinterpret_s8_s32(b_lane))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_int32x4_private r_ = simde_int32x4_to_private(r);
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
simde_int8x8_private b_ = simde_int8x8_to_private(b);
|
||||
|
||||
for(int i = 0 ; i < 4 ; i++) {
|
||||
int32_t acc = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(+:acc)
|
||||
for(int j = 0 ; j < 4 ; j++) {
|
||||
const int idx_b = j + (lane << 2);
|
||||
const int idx_a = j + (i << 2);
|
||||
acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]);
|
||||
}
|
||||
r_.values[i] += acc;
|
||||
}
|
||||
|
||||
result = simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD))
|
||||
#undef vdotq_lane_s32
|
||||
#define vdotq_lane_s32(r, a, b, lane) simde_vdotq_lane_s32((r), (a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_DOT_LANE_H) */
|
||||
1200
lib/simd_wrapper/simde/arm/neon/dup_lane.h
Normal file
1200
lib/simd_wrapper/simde/arm/neon/dup_lane.h
Normal file
File diff suppressed because it is too large
Load Diff
674
lib/simd_wrapper/simde/arm/neon/dup_n.h
Normal file
674
lib/simd_wrapper/simde/arm/neon/dup_n.h
Normal file
@@ -0,0 +1,674 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_DUP_N_H)
|
||||
#define SIMDE_ARM_NEON_DUP_N_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x4_t
|
||||
simde_vdup_n_f16(simde_float16 value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vdup_n_f16(value);
|
||||
#else
|
||||
simde_float16x4_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_float16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_f16 simde_vdup_n_f16
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_f16
|
||||
#define vdup_n_f16(value) simde_vdup_n_f16((value))
|
||||
#undef vmov_n_f16
|
||||
#define vmov_n_f16(value) simde_vmov_n_f16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vdup_n_f32(float value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_f32(value);
|
||||
#else
|
||||
simde_float32x2_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_f32 simde_vdup_n_f32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_f32
|
||||
#define vdup_n_f32(value) simde_vdup_n_f32((value))
|
||||
#undef vmov_n_f32
|
||||
#define vmov_n_f32(value) simde_vmov_n_f32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vdup_n_f64(double value) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vdup_n_f64(value);
|
||||
#else
|
||||
simde_float64x1_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_f64 simde_vdup_n_f64
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_f64
|
||||
#define vdup_n_f64(value) simde_vdup_n_f64((value))
|
||||
#undef vmov_n_f64
|
||||
#define vmov_n_f64(value) simde_vmov_n_f64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vdup_n_s8(int8_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_s8(value);
|
||||
#else
|
||||
simde_int8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi8(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_s8 simde_vdup_n_s8
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_s8
|
||||
#define vdup_n_s8(value) simde_vdup_n_s8((value))
|
||||
#undef vmov_n_s8
|
||||
#define vmov_n_s8(value) simde_vmov_n_s8((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vdup_n_s16(int16_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_s16(value);
|
||||
#else
|
||||
simde_int16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi16(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_s16 simde_vdup_n_s16
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_s16
|
||||
#define vdup_n_s16(value) simde_vdup_n_s16((value))
|
||||
#undef vmov_n_s16
|
||||
#define vmov_n_s16(value) simde_vmov_n_s16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vdup_n_s32(int32_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_s32(value);
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi32(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_s32 simde_vdup_n_s32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_s32
|
||||
#define vdup_n_s32(value) simde_vdup_n_s32((value))
|
||||
#undef vmov_n_s32
|
||||
#define vmov_n_s32(value) simde_vmov_n_s32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vdup_n_s64(int64_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_s64(value);
|
||||
#else
|
||||
simde_int64x1_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_s64 simde_vdup_n_s64
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_s64
|
||||
#define vdup_n_s64(value) simde_vdup_n_s64((value))
|
||||
#undef vmov_n_s64
|
||||
#define vmov_n_s64(value) simde_vmov_n_s64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vdup_n_u8(uint8_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_u8(value);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi8(HEDLEY_STATIC_CAST(int8_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_u8 simde_vdup_n_u8
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_u8
|
||||
#define vdup_n_u8(value) simde_vdup_n_u8((value))
|
||||
#undef vmov_n_u8
|
||||
#define vmov_n_u8(value) simde_vmov_n_u8((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vdup_n_u16(uint16_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_u16(value);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_u16 simde_vdup_n_u16
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_u16
|
||||
#define vdup_n_u16(value) simde_vdup_n_u16((value))
|
||||
#undef vmov_n_u16
|
||||
#define vmov_n_u16(value) simde_vmov_n_u16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vdup_n_u32(uint32_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_u32(value);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_u32 simde_vdup_n_u32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_u32
|
||||
#define vdup_n_u32(value) simde_vdup_n_u32((value))
|
||||
#undef vmov_n_u32
|
||||
#define vmov_n_u32(value) simde_vmov_n_u32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vdup_n_u64(uint64_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdup_n_u64(value);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmov_n_u64 simde_vdup_n_u64
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdup_n_u64
|
||||
#define vdup_n_u64(value) simde_vdup_n_u64((value))
|
||||
#undef vmov_n_u64
|
||||
#define vmov_n_u64(value) simde_vmov_n_u64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x8_t
|
||||
simde_vdupq_n_f16(simde_float16 value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vdupq_n_f16(value);
|
||||
#else
|
||||
simde_float16x8_private r_;
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
|
||||
return simde_float16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_f32 simde_vdupq_n_f32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_f16
|
||||
#define vdupq_n_f16(value) simde_vdupq_n_f16((value))
|
||||
#undef vmovq_n_f16
|
||||
#define vmovq_n_f16(value) simde_vmovq_n_f16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vdupq_n_f32(float value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_f32(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)
|
||||
(void) value;
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_float32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
r_.m128 = _mm_set1_ps(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_f32 simde_vdupq_n_f32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_f32
|
||||
#define vdupq_n_f32(value) simde_vdupq_n_f32((value))
|
||||
#undef vmovq_n_f32
|
||||
#define vmovq_n_f32(value) simde_vmovq_n_f32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vdupq_n_f64(double value) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vdupq_n_f64(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
(void) value;
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_float64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128d = _mm_set1_pd(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_f64 simde_vdupq_n_f64
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_f64
|
||||
#define vdupq_n_f64(value) simde_vdupq_n_f64((value))
|
||||
#undef vmovq_n_f64
|
||||
#define vmovq_n_f64(value) simde_vmovq_n_f64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vdupq_n_s8(int8_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_s8(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_int8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi8(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_s8 simde_vdupq_n_s8
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_s8
|
||||
#define vdupq_n_s8(value) simde_vdupq_n_s8((value))
|
||||
#undef vmovq_n_s8
|
||||
#define vmovq_n_s8(value) simde_vmovq_n_s8((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vdupq_n_s16(int16_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_s16(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi16(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_s16 simde_vdupq_n_s16
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_s16
|
||||
#define vdupq_n_s16(value) simde_vdupq_n_s16((value))
|
||||
#undef vmovq_n_s16
|
||||
#define vmovq_n_s16(value) simde_vmovq_n_s16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vdupq_n_s32(int32_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_s32(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi32(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_s32 simde_vdupq_n_s32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_s32
|
||||
#define vdupq_n_s32(value) simde_vdupq_n_s32((value))
|
||||
#undef vmovq_n_s32
|
||||
#define vmovq_n_s32(value) simde_vmovq_n_s32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vdupq_n_s64(int64_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_s64(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(HEDLEY_STATIC_CAST(signed long long, value));
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))
|
||||
r_.m128i = _mm_set1_epi64x(value);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_splat(value);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_s64 simde_vdupq_n_s64
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_s64
|
||||
#define vdupq_n_s64(value) simde_vdupq_n_s64((value))
|
||||
#undef vmovq_n_s64
|
||||
#define vmovq_n_s64(value) simde_vmovq_n_s64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vdupq_n_u8(uint8_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_u8(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value));
|
||||
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_u8 simde_vdupq_n_u8
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_u8
|
||||
#define vdupq_n_u8(value) simde_vdupq_n_u8((value))
|
||||
#undef vmovq_n_u8
|
||||
#define vmovq_n_u8(value) simde_vmovq_n_u8((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vdupq_n_u16(uint16_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_u16(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value));
|
||||
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_u16 simde_vdupq_n_u16
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_u16
|
||||
#define vdupq_n_u16(value) simde_vdupq_n_u16((value))
|
||||
#undef vmovq_n_u16
|
||||
#define vmovq_n_u16(value) simde_vmovq_n_u16((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vdupq_n_u32(uint32_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_u32(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(value);
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value));
|
||||
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_u32 simde_vdupq_n_u32
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_u32
|
||||
#define vdupq_n_u32(value) simde_vdupq_n_u32((value))
|
||||
#undef vmovq_n_u32
|
||||
#define vmovq_n_u32(value) simde_vmovq_n_u32((value))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vdupq_n_u64(uint64_t value) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vdupq_n_u64(value);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value));
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))
|
||||
r_.m128i = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value));
|
||||
#elif defined (SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, value));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#define simde_vmovq_n_u64 simde_vdupq_n_u64
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vdupq_n_u64
|
||||
#define vdupq_n_u64(value) simde_vdupq_n_u64((value))
|
||||
#undef vmovq_n_u64
|
||||
#define vmovq_n_u64(value) simde_vmovq_n_u64((value))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_DUP_N_H) */
|
||||
552
lib/simd_wrapper/simde/arm/neon/eor.h
Normal file
552
lib/simd_wrapper/simde/arm/neon/eor.h
Normal file
@@ -0,0 +1,552 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_EOR_H)
|
||||
#define SIMDE_ARM_NEON_EOR_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_veor_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_s8
|
||||
#define veor_s8(a, b) simde_veor_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_veor_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_s16
|
||||
#define veor_s16(a, b) simde_veor_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_veor_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_s32
|
||||
#define veor_s32(a, b) simde_veor_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_veor_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_s64
|
||||
#define veor_s64(a, b) simde_veor_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_veor_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_u8
|
||||
#define veor_u8(a, b) simde_veor_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_veor_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_u16
|
||||
#define veor_u16(a, b) simde_veor_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_veor_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_u32
|
||||
#define veor_u32(a, b) simde_veor_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_veor_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veor_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_xor_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veor_u64
|
||||
#define veor_u64(a, b) simde_veor_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_veorq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_s8
|
||||
#define veorq_s8(a, b) simde_veorq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_veorq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_s16
|
||||
#define veorq_s16(a, b) simde_veorq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_veorq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_s32
|
||||
#define veorq_s32(a, b) simde_veorq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_veorq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_s64
|
||||
#define veorq_s64(a, b) simde_veorq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_veorq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_u8
|
||||
#define veorq_u8(a, b) simde_veorq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_veorq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_u16
|
||||
#define veorq_u16(a, b) simde_veorq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_veorq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_u32
|
||||
#define veorq_u32(a, b) simde_veorq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_veorq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return veorq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_xor(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_xor(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values ^ b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] ^ b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef veorq_u64
|
||||
#define veorq_u64(a, b) simde_veorq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_EOR_H) */
|
||||
796
lib/simd_wrapper/simde/arm/neon/ext.h
Normal file
796
lib/simd_wrapper/simde/arm/neon/ext.h
Normal file
@@ -0,0 +1,796 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_EXT_H)
|
||||
#define SIMDE_ARM_NEON_EXT_H
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_float32x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_float32x2_private
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_f32(a, b, n) simde_float32x2_from_m64(_mm_alignr_pi8(simde_float32x2_to_m64(b), simde_float32x2_to_m64(a), n * sizeof(simde_float32)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_f32(a, b, n) (__extension__ ({ \
|
||||
simde_float32x2_private simde_vext_f32_r_; \
|
||||
simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_float32x2_from_private(simde_vext_f32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_f32
|
||||
#define vext_f32(a, b, n) simde_vext_f32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
(void) n;
|
||||
return vext_f64(a, b, 0);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
|
||||
}
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_f64(a, b, n) simde_float64x1_from_m64(_mm_alignr_pi8(simde_float64x1_to_m64(b), simde_float64x1_to_m64(a), n * sizeof(simde_float64)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vext_f64(a, b, n) (__extension__ ({ \
|
||||
simde_float64x1_private simde_vext_f64_r_; \
|
||||
simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, (n))); \
|
||||
simde_float64x1_from_private(simde_vext_f64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_f64
|
||||
#define vext_f64(a, b, n) simde_vext_f64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int8x8_t r;
|
||||
SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
|
||||
}
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_s8(a, b, n) simde_int8x8_from_m64(_mm_alignr_pi8(simde_int8x8_to_m64(b), simde_int8x8_to_m64(a), n * sizeof(int8_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_s8(a, b, n) (__extension__ ({ \
|
||||
simde_int8x8_private simde_vext_s8_r_; \
|
||||
simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
|
||||
simde_int8x8_from_private(simde_vext_s8_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_s8
|
||||
#define vext_s8(a, b, n) simde_vext_s8((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int16x4_t r;
|
||||
SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
|
||||
}
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_s16(a, b, n) simde_int16x4_from_m64(_mm_alignr_pi8(simde_int16x4_to_m64(b), simde_int16x4_to_m64(a), n * sizeof(int16_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_s16(a, b, n) (__extension__ ({ \
|
||||
simde_int16x4_private simde_vext_s16_r_; \
|
||||
simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
|
||||
simde_int16x4_from_private(simde_vext_s16_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_s16
|
||||
#define vext_s16(a, b, n) simde_vext_s16((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_s32(a, b, n) simde_int32x2_from_m64(_mm_alignr_pi8(simde_int32x2_to_m64(b), simde_int32x2_to_m64(a), n * sizeof(int32_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_s32(a, b, n) (__extension__ ({ \
|
||||
simde_int32x2_private simde_vext_s32_r_; \
|
||||
simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_int32x2_from_private(simde_vext_s32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_s32
|
||||
#define vext_s32(a, b, n) simde_vext_s32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
(void) n;
|
||||
return vext_s64(a, b, 0);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
|
||||
}
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_s64(a, b, n) simde_int64x1_from_m64(_mm_alignr_pi8(simde_int64x1_to_m64(b), simde_int64x1_to_m64(a), n * sizeof(int64_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vext_s64(a, b, n) (__extension__ ({ \
|
||||
simde_int64x1_private simde_vext_s64_r_; \
|
||||
simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \
|
||||
simde_int64x1_from_private(simde_vext_s64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_s64
|
||||
#define vext_s64(a, b, n) simde_vext_s64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint8x8_t r;
|
||||
SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
|
||||
}
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_u8(a, b, n) simde_uint8x8_from_m64(_mm_alignr_pi8(simde_uint8x8_to_m64(b), simde_uint8x8_to_m64(a), n * sizeof(uint8_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_u8(a, b, n) (__extension__ ({ \
|
||||
simde_uint8x8_private simde_vext_u8_r_; \
|
||||
simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
|
||||
simde_uint8x8_from_private(simde_vext_u8_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_u8
|
||||
#define vext_u8(a, b, n) simde_vext_u8((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint16x4_t r;
|
||||
SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
|
||||
}
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_u16(a, b, n) simde_uint16x4_from_m64(_mm_alignr_pi8(simde_uint16x4_to_m64(b), simde_uint16x4_to_m64(a), n * sizeof(uint16_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_u16(a, b, n) (__extension__ ({ \
|
||||
simde_uint16x4_private simde_vext_u16_r_; \
|
||||
simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
|
||||
simde_uint16x4_from_private(simde_vext_u16_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_u16
|
||||
#define vext_u16(a, b, n) simde_vext_u16((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_u32(a, b, n) simde_uint32x2_from_m64(_mm_alignr_pi8(simde_uint32x2_to_m64(b), simde_uint32x2_to_m64(a), n * sizeof(uint32_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760)
|
||||
#define simde_vext_u32(a, b, n) (__extension__ ({ \
|
||||
simde_uint32x2_private simde_vext_u32_r_; \
|
||||
simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_uint32x2_from_private(simde_vext_u32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_u32
|
||||
#define vext_u32(a, b, n) simde_vext_u32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
(void) n;
|
||||
return vext_u64(a, b, 0);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
|
||||
}
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vext_u64(a, b, n) simde_uint64x1_from_m64(_mm_alignr_pi8(simde_uint64x1_to_m64(b), simde_uint64x1_to_m64(a), n * sizeof(uint64_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vext_u64(a, b, n) (__extension__ ({ \
|
||||
simde_uint64x1_private simde_vext_u64_r_; \
|
||||
simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \
|
||||
simde_uint64x1_from_private(simde_vext_u64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vext_u64
|
||||
#define vext_u64(a, b, n) simde_vext_u64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_float32x4_t r;
|
||||
SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_float32x4_private
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
|
||||
}
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_f32(a, b, n) simde_float32x4_from_m128(_mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(simde_float32x4_to_m128(b)), _mm_castps_si128(simde_float32x4_to_m128(a)), n * sizeof(simde_float32))))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_f32(a, b, n) (__extension__ ({ \
|
||||
simde_float32x4_private simde_vextq_f32_r_; \
|
||||
simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
|
||||
simde_float32x4_from_private(simde_vextq_f32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_f32
|
||||
#define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
simde_float64x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_float64x2_private
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_f64(a, b, n) simde_float64x2_from_m128d(_mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(simde_float64x2_to_m128d(b)), _mm_castpd_si128(simde_float64x2_to_m128d(a)), n * sizeof(simde_float64))))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_f64(a, b, n) (__extension__ ({ \
|
||||
simde_float64x2_private simde_vextq_f64_r_; \
|
||||
simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_float64x2_from_private(simde_vextq_f64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_f64
|
||||
#define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int8x16_t r;
|
||||
SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
|
||||
}
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_s8(a, b, n) simde_int8x16_from_m128i(_mm_alignr_epi8(simde_int8x16_to_m128i(b), simde_int8x16_to_m128i(a), n * sizeof(int8_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_s8(a, b, n) (__extension__ ({ \
|
||||
simde_int8x16_private simde_vextq_s8_r_; \
|
||||
simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \
|
||||
simde_int8x16_from_private(simde_vextq_s8_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_s8
|
||||
#define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int16x8_t r;
|
||||
SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
|
||||
}
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_s16(a, b, n) simde_int16x8_from_m128i(_mm_alignr_epi8(simde_int16x8_to_m128i(b), simde_int16x8_to_m128i(a), n * sizeof(int16_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_s16(a, b, n) (__extension__ ({ \
|
||||
simde_int16x8_private simde_vextq_s16_r_; \
|
||||
simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
|
||||
simde_int16x8_from_private(simde_vextq_s16_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_s16
|
||||
#define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int32x4_t r;
|
||||
SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
|
||||
}
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_s32(a, b, n) simde_int32x4_from_m128i(_mm_alignr_epi8(simde_int32x4_to_m128i(b), simde_int32x4_to_m128i(a), n * sizeof(int32_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_s32(a, b, n) (__extension__ ({ \
|
||||
simde_int32x4_private simde_vextq_s32_r_; \
|
||||
simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
|
||||
simde_int32x4_from_private(simde_vextq_s32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_s32
|
||||
#define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_int64x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_s64(a, b, n) simde_int64x2_from_m128i(_mm_alignr_epi8(simde_int64x2_to_m128i(b), simde_int64x2_to_m128i(a), n * sizeof(int64_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_s64(a, b, n) (__extension__ ({ \
|
||||
simde_int64x2_private simde_vextq_s64_r_; \
|
||||
simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_int64x2_from_private(simde_vextq_s64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_s64
|
||||
#define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint8x16_t r;
|
||||
SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
|
||||
}
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_u8(a, b, n) simde_uint8x16_from_m128i(_mm_alignr_epi8(simde_uint8x16_to_m128i(b), simde_uint8x16_to_m128i(a), n * sizeof(uint8_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_u8(a, b, n) (__extension__ ({ \
|
||||
simde_uint8x16_private simde_vextq_u8_r_; \
|
||||
simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \
|
||||
simde_uint8x16_from_private(simde_vextq_u8_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_u8
|
||||
#define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint16x8_t r;
|
||||
SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
|
||||
}
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_u16(a, b, n) simde_uint16x8_from_m128i(_mm_alignr_epi8(simde_uint16x8_to_m128i(b), simde_uint16x8_to_m128i(a), n * sizeof(uint16_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_u16(a, b, n) (__extension__ ({ \
|
||||
simde_uint16x8_private simde_vextq_u16_r_; \
|
||||
simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \
|
||||
simde_uint16x8_from_private(simde_vextq_u16_r_); \
|
||||
}))
|
||||
#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
#define simde_vextq_u16(a, b, n) (__extension__ ({ \
|
||||
simde_uint16x8_private r_; \
|
||||
r_.values = __builtin_shufflevector( \
|
||||
simde_uint16x8_to_private(a).values, \
|
||||
simde_uint16x8_to_private(b).values, \
|
||||
n + 0, n + 1, n + 2, n + 3, n + 4, n + 5, n + 6, n + 7); \
|
||||
simde_uint16x8_from_private(r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_u16
|
||||
#define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint32x4_t r;
|
||||
SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
|
||||
}
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_u32(a, b, n) simde_uint32x4_from_m128i(_mm_alignr_epi8(simde_uint32x4_to_m128i(b), simde_uint32x4_to_m128i(a), n * sizeof(uint32_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_u32(a, b, n) (__extension__ ({ \
|
||||
simde_uint32x4_private simde_vextq_u32_r_; \
|
||||
simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \
|
||||
simde_uint32x4_from_private(simde_vextq_u32_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_u32
|
||||
#define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
simde_uint64x2_t r;
|
||||
SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
|
||||
return r;
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b),
|
||||
r_ = a_;
|
||||
const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
size_t src = i + n_;
|
||||
r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
|
||||
}
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE)
|
||||
#define simde_vextq_u64(a, b, n) simde_uint64x2_from_m128i(_mm_alignr_epi8(simde_uint64x2_to_m128i(b), simde_uint64x2_to_m128i(a), n * sizeof(uint64_t)))
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
|
||||
#define simde_vextq_u64(a, b, n) (__extension__ ({ \
|
||||
simde_uint64x2_private simde_vextq_u64_r_; \
|
||||
simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \
|
||||
HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \
|
||||
simde_uint64x2_from_private(simde_vextq_u64_r_); \
|
||||
}))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vextq_u64
|
||||
#define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_EXT_H) */
|
||||
126
lib/simd_wrapper/simde/arm/neon/fma.h
Normal file
126
lib/simd_wrapper/simde/arm/neon/fma.h
Normal file
@@ -0,0 +1,126 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_FMA_H)
|
||||
#define SIMDE_ARM_NEON_FMA_H
|
||||
|
||||
#include "add.h"
|
||||
#include "mul.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
return vfma_f32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_f32(a, simde_vmul_f32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_f32
|
||||
#define vfma_f32(a, b, c) simde_vfma_f32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
return vfma_f64(a, b, c);
|
||||
#else
|
||||
return simde_vadd_f64(a, simde_vmul_f64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_f64
|
||||
#define vfma_f64(a, b, c) simde_vfma_f64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
return vfmaq_f32(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_madd(b, c, a);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b),
|
||||
c_ = simde_float32x4_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128);
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f32(a, simde_vmulq_f32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_f32
|
||||
#define vfmaq_f32(a, b, c) simde_vfmaq_f32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vfmaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
return vfmaq_f64(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_madd(b, c, a);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b),
|
||||
c_ = simde_float64x2_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d);
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f64(a, simde_vmulq_f64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_f64
|
||||
#define vfmaq_f64(a, b, c) simde_vfmaq_f64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */
|
||||
225
lib/simd_wrapper/simde/arm/neon/fma_lane.h
Normal file
225
lib/simd_wrapper/simde/arm/neon/fma_lane.h
Normal file
@@ -0,0 +1,225 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Atharva Nimbalkar <atharvakn@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_FMA_LANE_H)
|
||||
#define SIMDE_ARM_NEON_FMA_LANE_H
|
||||
|
||||
#include "add.h"
|
||||
#include "dup_n.h"
|
||||
#include "get_lane.h"
|
||||
#include "mul.h"
|
||||
#include "mul_lane.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
/* simde_vfmad_lane_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vfmad_lane_f64(a, b, v, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_lane_f64(a, b, v, lane))
|
||||
#else
|
||||
#define simde_vfmad_lane_f64(a, b, v, lane) vfmad_lane_f64((a), (b), (v), (lane))
|
||||
#endif
|
||||
#else
|
||||
#define simde_vfmad_lane_f64(a, b, v, lane) \
|
||||
simde_vget_lane_f64( \
|
||||
simde_vadd_f64( \
|
||||
simde_vdup_n_f64(a), \
|
||||
simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \
|
||||
), \
|
||||
0 \
|
||||
)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmad_lane_f64
|
||||
#define vfmad_lane_f64(a, b, v, lane) simde_vfmad_lane_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmad_laneq_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vfmad_laneq_f64(a, b, v, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_laneq_f64(a, b, v, lane))
|
||||
#else
|
||||
#define simde_vfmad_laneq_f64(a, b, v, lane) vfmad_laneq_f64((a), (b), (v), (lane))
|
||||
#endif
|
||||
#else
|
||||
#define simde_vfmad_laneq_f64(a, b, v, lane) \
|
||||
simde_vget_lane_f64( \
|
||||
simde_vadd_f64( \
|
||||
simde_vdup_n_f64(a), \
|
||||
simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \
|
||||
), \
|
||||
0 \
|
||||
)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmad_laneq_f64
|
||||
#define vfmad_laneq_f64(a, b, v, lane) simde_vfmad_laneq_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmas_lane_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vfmas_lane_f32(a, b, v, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_lane_f32(a, b, v, lane))
|
||||
#else
|
||||
#define simde_vfmas_lane_f32(a, b, v, lane) vfmas_lane_f32((a), (b), (v), (lane))
|
||||
#endif
|
||||
#else
|
||||
#define simde_vfmas_lane_f32(a, b, v, lane) \
|
||||
simde_vget_lane_f32( \
|
||||
simde_vadd_f32( \
|
||||
simde_vdup_n_f32(a), \
|
||||
simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \
|
||||
), \
|
||||
0 \
|
||||
)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmas_lane_f32
|
||||
#define vfmas_lane_f32(a, b, v, lane) simde_vfmas_lane_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmas_laneq_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vfmas_laneq_f32(a, b, v, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_laneq_f32(a, b, v, lane))
|
||||
#else
|
||||
#define simde_vfmas_laneq_f32(a, b, v, lane) vfmas_laneq_f32((a), (b), (v), (lane))
|
||||
#endif
|
||||
#else
|
||||
#define simde_vfmas_laneq_f32(a, b, v, lane) \
|
||||
simde_vget_lane_f32( \
|
||||
simde_vadd_f32( \
|
||||
simde_vdup_n_f32(a), \
|
||||
simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \
|
||||
), \
|
||||
0 \
|
||||
)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmas_laneq_f32
|
||||
#define vfmas_laneq_f32(a, b, v, lane) simde_vfmas_laneq_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfma_lane_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane)
|
||||
#else
|
||||
#define simde_vfma_lane_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_lane_f32(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_lane_f32
|
||||
#define vfma_lane_f32(a, b, v, lane) simde_vfma_lane_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfma_lane_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfma_lane_f64(a, b, v, lane) vfma_lane_f64((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfma_lane_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_lane_f64(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_lane_f64
|
||||
#define vfma_lane_f64(a, b, v, lane) simde_vfma_lane_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfma_laneq_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfma_laneq_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_laneq_f32(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_laneq_f32
|
||||
#define vfma_laneq_f32(a, b, v, lane) simde_vfma_laneq_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfma_laneq_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfma_laneq_f64(a, b, v, lane) vfma_laneq_f64((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfma_laneq_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_laneq_f64(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_laneq_f64
|
||||
#define vfma_laneq_f64(a, b, v, lane) simde_vfma_laneq_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmaq_lane_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfmaq_lane_f64(a, b, v, lane) vfmaq_lane_f64((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfmaq_lane_f64(a, b, v, lane) simde_vaddq_f64(a, simde_vmulq_lane_f64(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_lane_f64
|
||||
#define vfmaq_lane_f64(a, b, v, lane) simde_vfmaq_lane_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmaq_lane_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfmaq_lane_f32(a, b, v, lane) simde_vaddq_f32(a, simde_vmulq_lane_f32(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_lane_f32
|
||||
#define vfmaq_lane_f32(a, b, v, lane) simde_vfmaq_lane_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmaq_laneq_f32 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfmaq_laneq_f32(a, b, v, lane) \
|
||||
simde_vaddq_f32(a, simde_vmulq_laneq_f32(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_laneq_f32
|
||||
#define vfmaq_laneq_f32(a, b, v, lane) simde_vfmaq_laneq_f32(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
/* simde_vfmaq_laneq_f64 */
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
|
||||
#define simde_vfmaq_laneq_f64(a, b, v, lane) vfmaq_laneq_f64((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vfmaq_laneq_f64(a, b, v, lane) \
|
||||
simde_vaddq_f64(a, simde_vmulq_laneq_f64(b, v, lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_laneq_f64
|
||||
#define vfmaq_laneq_f64(a, b, v, lane) simde_vfmaq_laneq_f64(a, b, v, lane)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_FMA_LANE_H) */
|
||||
97
lib/simd_wrapper/simde/arm/neon/fma_n.h
Normal file
97
lib/simd_wrapper/simde/arm/neon/fma_n.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_FMA_N_H)
|
||||
#define SIMDE_ARM_NEON_FMA_N_H
|
||||
|
||||
#include "types.h"
|
||||
#include "dup_n.h"
|
||||
#include "fma.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
|
||||
return vfma_n_f32(a, b, c);
|
||||
#else
|
||||
return simde_vfma_f32(a, b, simde_vdup_n_f32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_n_f32
|
||||
#define vfma_n_f32(a, b, c) simde_vfma_n_f32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
|
||||
return vfma_n_f64(a, b, c);
|
||||
#else
|
||||
return simde_vfma_f64(a, b, simde_vdup_n_f64(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfma_n_f64
|
||||
#define vfma_n_f64(a, b, c) simde_vfma_n_f64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
|
||||
return vfmaq_n_f32(a, b, c);
|
||||
#else
|
||||
return simde_vfmaq_f32(a, b, simde_vdupq_n_f32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_n_f32
|
||||
#define vfmaq_n_f32(a, b, c) simde_vfmaq_n_f32(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vfmaq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
|
||||
return vfmaq_n_f64(a, b, c);
|
||||
#else
|
||||
return simde_vfmaq_f64(a, b, simde_vdupq_n_f64(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vfmaq_n_f64
|
||||
#define vfmaq_n_f64(a, b, c) simde_vfmaq_n_f64(a, b, c)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */
|
||||
300
lib/simd_wrapper/simde/arm/neon/get_high.h
Normal file
300
lib/simd_wrapper/simde/arm/neon/get_high.h
Normal file
@@ -0,0 +1,300 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_GET_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_GET_HIGH_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vget_high_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_f32(a);
|
||||
#else
|
||||
simde_float32x2_private r_;
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_f32
|
||||
#define vget_high_f32(a) simde_vget_high_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vget_high_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vget_high_f64(a);
|
||||
#else
|
||||
simde_float64x1_private r_;
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_f64
|
||||
#define vget_high_f64(a) simde_vget_high_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vget_high_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_s8(a);
|
||||
#else
|
||||
simde_int8x8_private r_;
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_s8
|
||||
#define vget_high_s8(a) simde_vget_high_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vget_high_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_s16(a);
|
||||
#else
|
||||
simde_int16x4_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_s16
|
||||
#define vget_high_s16(a) simde_vget_high_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vget_high_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_s32(a);
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_s32
|
||||
#define vget_high_s32(a) simde_vget_high_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vget_high_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_s64(a);
|
||||
#else
|
||||
simde_int64x1_private r_;
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_s64
|
||||
#define vget_high_s64(a) simde_vget_high_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vget_high_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14,15);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_u8
|
||||
#define vget_high_u8(a) simde_vget_high_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vget_high_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_u16
|
||||
#define vget_high_u16(a) simde_vget_high_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vget_high_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_u32
|
||||
#define vget_high_u32(a) simde_vget_high_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vget_high_u64(simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_high_u64(a);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_high_u64
|
||||
#define vget_high_u64(a) simde_vget_high_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_GET_HIGH_H) */
|
||||
519
lib/simd_wrapper/simde/arm/neon/get_lane.h
Normal file
519
lib/simd_wrapper/simde/arm/neon/get_lane.h
Normal file
@@ -0,0 +1,519 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_GET_LANE_H)
|
||||
#define SIMDE_ARM_NEON_GET_LANE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vget_lane_f32(simde_float32x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v);
|
||||
#else
|
||||
simde_float32x2_private v_ = simde_float32x2_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_f32
|
||||
#define vget_lane_f32(v, lane) simde_vget_lane_f32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vget_lane_f64(simde_float64x1_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_float64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
(void) lane;
|
||||
return vget_lane_f64(v, 0);
|
||||
#else
|
||||
simde_float64x1_private v_ = simde_float64x1_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_f64
|
||||
#define vget_lane_f64(v, lane) simde_vget_lane_f64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vget_lane_s8(simde_int8x8_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_8_(vget_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v);
|
||||
#else
|
||||
simde_int8x8_private v_ = simde_int8x8_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_s8
|
||||
#define vget_lane_s8(v, lane) simde_vget_lane_s8((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vget_lane_s16(simde_int16x4_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(vget_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v);
|
||||
#else
|
||||
simde_int16x4_private v_ = simde_int16x4_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_s16
|
||||
#define vget_lane_s16(v, lane) simde_vget_lane_s16((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vget_lane_s32(simde_int32x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vget_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v);
|
||||
#else
|
||||
simde_int32x2_private v_ = simde_int32x2_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_s32
|
||||
#define vget_lane_s32(v, lane) simde_vget_lane_s32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vget_lane_s64(simde_int64x1_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
int64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
(void) lane;
|
||||
return vget_lane_s64(v, 0);
|
||||
#else
|
||||
simde_int64x1_private v_ = simde_int64x1_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_s64
|
||||
#define vget_lane_s64(v, lane) simde_vget_lane_s64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vget_lane_u8(simde_uint8x8_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_8_(vget_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint8x8_private v_ = simde_uint8x8_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_u8
|
||||
#define vget_lane_u8(v, lane) simde_vget_lane_u8((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vget_lane_u16(simde_uint16x4_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(vget_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint16x4_private v_ = simde_uint16x4_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_u16
|
||||
#define vget_lane_u16(v, lane) simde_vget_lane_u16((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vget_lane_u32(simde_uint32x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vget_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint32x2_private v_ = simde_uint32x2_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_u32
|
||||
#define vget_lane_u32(v, lane) simde_vget_lane_u32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vget_lane_u64(simde_uint64x1_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
uint64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
(void) lane;
|
||||
return vget_lane_u64(v, 0);
|
||||
#else
|
||||
simde_uint64x1_private v_ = simde_uint64x1_to_private(v);
|
||||
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_lane_u64
|
||||
#define vget_lane_u64(v, lane) simde_vget_lane_u64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vgetq_lane_f32(simde_float32x4_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v);
|
||||
#else
|
||||
simde_float32x4_private v_ = simde_float32x4_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(wasm_f32x4_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v_.v128);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_f32
|
||||
#define vgetq_lane_f32(v, lane) simde_vgetq_lane_f32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vgetq_lane_f64(simde_float64x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v);
|
||||
#else
|
||||
simde_float64x2_private v_ = simde_float64x2_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(wasm_f64x2_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v_.v128);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_f64
|
||||
#define vgetq_lane_f64(v, lane) simde_vgetq_lane_f64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vgetq_lane_s8(simde_int8x16_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_16_(vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v);
|
||||
#else
|
||||
simde_int8x16_private v_ = simde_int8x16_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int r_;
|
||||
SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(int8_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_s8
|
||||
#define vgetq_lane_s8(v, lane) simde_vgetq_lane_s8((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vgetq_lane_s16(simde_int16x8_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_8_(vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v);
|
||||
#else
|
||||
simde_int16x8_private v_ = simde_int16x8_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int r_;
|
||||
SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(int16_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_s16
|
||||
#define vgetq_lane_s16(v, lane) simde_vgetq_lane_s16((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vgetq_lane_s32(simde_int32x4_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v);
|
||||
#else
|
||||
simde_int32x4_private v_ = simde_int32x4_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int r_;
|
||||
SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(int32_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_s32
|
||||
#define vgetq_lane_s32(v, lane) simde_vgetq_lane_s32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vgetq_lane_s64(simde_int64x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
int64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v);
|
||||
#else
|
||||
simde_int64x2_private v_ = simde_int64x2_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int64_t r_;
|
||||
SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(int64_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_s64
|
||||
#define vgetq_lane_s64(v, lane) simde_vgetq_lane_s64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vgetq_lane_u8(simde_uint8x16_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_16_(vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint8x16_private v_ = simde_uint8x16_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int r_;
|
||||
SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(uint8_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_u8
|
||||
#define vgetq_lane_u8(v, lane) simde_vgetq_lane_u8((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vgetq_lane_u16(simde_uint16x8_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_8_(vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint16x8_private v_ = simde_uint16x8_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int r_;
|
||||
SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(uint16_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_u16
|
||||
#define vgetq_lane_u16(v, lane) simde_vgetq_lane_u16((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vgetq_lane_u32(simde_uint32x4_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_4_(vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint32x4_private v_ = simde_uint32x4_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int32_t r_;
|
||||
SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(uint32_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_u32
|
||||
#define vgetq_lane_u32(v, lane) simde_vgetq_lane_u32((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vgetq_lane_u64(simde_uint64x2_t v, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
uint64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
SIMDE_CONSTIFY_2_(vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v);
|
||||
#else
|
||||
simde_uint64x2_private v_ = simde_uint64x2_to_private(v);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
int64_t r_;
|
||||
SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v_.v128);
|
||||
r = HEDLEY_STATIC_CAST(uint64_t, r_);
|
||||
#else
|
||||
r = v_.values[lane];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vgetq_lane_u64
|
||||
#define vgetq_lane_u64(v, lane) simde_vgetq_lane_u64((v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_GET_LANE_H) */
|
||||
332
lib/simd_wrapper/simde/arm/neon/get_low.h
Normal file
332
lib/simd_wrapper/simde/arm/neon/get_low.h
Normal file
@@ -0,0 +1,332 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_GET_LOW_H)
|
||||
#define SIMDE_ARM_NEON_GET_LOW_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vget_low_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_f32(a);
|
||||
#else
|
||||
simde_float32x2_private r_;
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_f32
|
||||
#define vget_low_f32(a) simde_vget_low_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vget_low_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vget_low_f64(a);
|
||||
#else
|
||||
simde_float64x1_private r_;
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_f64
|
||||
#define vget_low_f64(a) simde_vget_low_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vget_low_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_s8(a);
|
||||
#else
|
||||
simde_int8x8_private r_;
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_s8
|
||||
#define vget_low_s8(a) simde_vget_low_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vget_low_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_s16(a);
|
||||
#else
|
||||
simde_int16x4_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_s16
|
||||
#define vget_low_s16(a) simde_vget_low_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vget_low_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_s32(a);
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_s32
|
||||
#define vget_low_s32(a) simde_vget_low_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vget_low_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_s64(a);
|
||||
#else
|
||||
simde_int64x1_private r_;
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_s64
|
||||
#define vget_low_s64(a) simde_vget_low_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vget_low_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_u8
|
||||
#define vget_low_u8(a) simde_vget_low_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vget_low_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_u16
|
||||
#define vget_low_u16(a) simde_vget_low_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vget_low_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_u32
|
||||
#define vget_low_u32(a) simde_vget_low_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vget_low_u64(simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vget_low_u64(a);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_movepi64_pi64(a_.m128i);
|
||||
#else
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
|
||||
r_.values = __builtin_shufflevector(a_.values, a_.values, 0);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vget_low_u64
|
||||
#define vget_low_u64(a) simde_vget_low_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_GET_LOW_H) */
|
||||
310
lib/simd_wrapper/simde/arm/neon/hadd.h
Normal file
310
lib/simd_wrapper/simde/arm/neon/hadd.h
Normal file
@@ -0,0 +1,310 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
/* TODO: the 128-bit versions only require AVX-512 because of the final
|
||||
* conversions from larger types down to smaller ones. We could get
|
||||
* the same results from AVX/AVX2 instructions with some shuffling
|
||||
* to extract the low half of each input element to the low half
|
||||
* of a 256-bit vector, then cast that to a 128-bit vector. */
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_HADD_H)
|
||||
#define SIMDE_ARM_NEON_HADD_H
|
||||
|
||||
#include "addl.h"
|
||||
#include "shr_n.h"
|
||||
#include "movn.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vhadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_s8(a, b);
|
||||
#else
|
||||
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddl_s8(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_s8
|
||||
#define vhadd_s8(a, b) simde_vhadd_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vhadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_s16(a, b);
|
||||
#else
|
||||
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddl_s16(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_s16
|
||||
#define vhadd_s16(a, b) simde_vhadd_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vhadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_s32(a, b);
|
||||
#else
|
||||
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddl_s32(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_s32
|
||||
#define vhadd_s32(a, b) simde_vhadd_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_u8(a, b);
|
||||
#else
|
||||
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddl_u8(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_u8
|
||||
#define vhadd_u8(a, b) simde_vhadd_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_u16(a, b);
|
||||
#else
|
||||
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddl_u16(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_u16
|
||||
#define vhadd_u16(a, b) simde_vhadd_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhadd_u32(a, b);
|
||||
#else
|
||||
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddl_u32(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhadd_u32
|
||||
#define vhadd_u32(a, b) simde_vhadd_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_s8(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_add_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) + HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_s8
|
||||
#define vhaddq_s8(a, b) simde_vhaddq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_s16(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_add_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) + HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_s16
|
||||
#define vhaddq_s16(a, b) simde_vhaddq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_s32(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_add_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) + HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_s32
|
||||
#define vhaddq_s32(a, b) simde_vhaddq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_u8(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_add_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
v128_t lo =
|
||||
wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_low_u8x16(a_.v128),
|
||||
wasm_u16x8_extend_low_u8x16(b_.v128)),
|
||||
1);
|
||||
v128_t hi =
|
||||
wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_high_u8x16(a_.v128),
|
||||
wasm_u16x8_extend_high_u8x16(b_.v128)),
|
||||
1);
|
||||
r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
|
||||
22, 24, 26, 28, 30);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_u8
|
||||
#define vhaddq_u8(a, b) simde_vhaddq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_u16(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_add_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_u16
|
||||
#define vhaddq_u16(a, b) simde_vhaddq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhaddq_u32(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_add_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhaddq_u32
|
||||
#define vhaddq_u32(a, b) simde_vhaddq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_HADD_H) */
|
||||
310
lib/simd_wrapper/simde/arm/neon/hsub.h
Normal file
310
lib/simd_wrapper/simde/arm/neon/hsub.h
Normal file
@@ -0,0 +1,310 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
/* TODO: the 128-bit versions only require AVX-512 because of the final
|
||||
* conversions from larger types down to smaller ones. We could get
|
||||
* the same results from AVX/AVX2 instructions with some shuffling
|
||||
* to extract the low half of each input element to the low half
|
||||
* of a 256-bit vector, then cast that to a 128-bit vector. */
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_HSUB_H)
|
||||
#define SIMDE_ARM_NEON_HSUB_H
|
||||
|
||||
#include "subl.h"
|
||||
#include "shr_n.h"
|
||||
#include "movn.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vhsub_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_s8(a, b);
|
||||
#else
|
||||
return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubl_s8(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_s8
|
||||
#define vhsub_s8(a, b) simde_vhsub_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vhsub_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_s16(a, b);
|
||||
#else
|
||||
return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubl_s16(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_s16
|
||||
#define vhsub_s16(a, b) simde_vhsub_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vhsub_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_s32(a, b);
|
||||
#else
|
||||
return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubl_s32(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_s32
|
||||
#define vhsub_s32(a, b) simde_vhsub_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vhsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_u8(a, b);
|
||||
#else
|
||||
return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubl_u8(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_u8
|
||||
#define vhsub_u8(a, b) simde_vhsub_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vhsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_u16(a, b);
|
||||
#else
|
||||
return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubl_u16(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_u16
|
||||
#define vhsub_u16(a, b) simde_vhsub_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vhsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsub_u32(a, b);
|
||||
#else
|
||||
return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubl_u32(a, b), 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsub_u32
|
||||
#define vhsub_u32(a, b) simde_vhsub_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vhsubq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_s8(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_sub_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_s8
|
||||
#define vhsubq_s8(a, b) simde_vhsubq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vhsubq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_s16(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_sub_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_s16
|
||||
#define vhsubq_s16(a, b) simde_vhsubq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vhsubq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_s32(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_s32
|
||||
#define vhsubq_s32(a, b) simde_vhsubq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vhsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_u8(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_sub_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
v128_t lo =
|
||||
wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_low_u8x16(a_.v128),
|
||||
wasm_u16x8_extend_low_u8x16(b_.v128)),
|
||||
1);
|
||||
v128_t hi =
|
||||
wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_high_u8x16(a_.v128),
|
||||
wasm_u16x8_extend_high_u8x16(b_.v128)),
|
||||
1);
|
||||
r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
|
||||
22, 24, 26, 28, 30);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_u8
|
||||
#define vhsubq_u8(a, b) simde_vhsubq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vhsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_u16(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_sub_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_u16
|
||||
#define vhsubq_u16(a, b) simde_vhsubq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vhsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vhsubq_u32(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_sub_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vhsubq_u32
|
||||
#define vhsubq_u32(a, b) simde_vhsubq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_HSUB_H) */
|
||||
512
lib/simd_wrapper/simde/arm/neon/ld1.h
Normal file
512
lib/simd_wrapper/simde/arm/neon/ld1.h
Normal file
@@ -0,0 +1,512 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD1_H)
|
||||
#define SIMDE_ARM_NEON_LD1_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x4_t
|
||||
simde_vld1_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vld1_f16(ptr);
|
||||
#else
|
||||
simde_float16x4_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_float16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_f16
|
||||
#define vld1_f16(a) simde_vld1_f16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vld1_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_f32(ptr);
|
||||
#else
|
||||
simde_float32x2_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_f32
|
||||
#define vld1_f32(a) simde_vld1_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vld1_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(1)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld1_f64(ptr);
|
||||
#else
|
||||
simde_float64x1_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_f64
|
||||
#define vld1_f64(a) simde_vld1_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vld1_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_s8(ptr);
|
||||
#else
|
||||
simde_int8x8_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_s8
|
||||
#define vld1_s8(a) simde_vld1_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vld1_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_s16(ptr);
|
||||
#else
|
||||
simde_int16x4_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_s16
|
||||
#define vld1_s16(a) simde_vld1_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vld1_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_s32(ptr);
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_s32
|
||||
#define vld1_s32(a) simde_vld1_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vld1_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_s64(ptr);
|
||||
#else
|
||||
simde_int64x1_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_s64
|
||||
#define vld1_s64(a) simde_vld1_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vld1_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_u8(ptr);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_u8
|
||||
#define vld1_u8(a) simde_vld1_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vld1_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_u16(ptr);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_u16
|
||||
#define vld1_u16(a) simde_vld1_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vld1_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_u32(ptr);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_u32
|
||||
#define vld1_u32(a) simde_vld1_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vld1_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_u64(ptr);
|
||||
#else
|
||||
simde_uint64x1_private r_;
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_u64
|
||||
#define vld1_u64(a) simde_vld1_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float16x8_t
|
||||
simde_vld1q_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
|
||||
return vld1q_f16(ptr);
|
||||
#else
|
||||
simde_float16x8_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_float16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_f16
|
||||
#define vld1q_f16(a) simde_vld1q_f16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_f32(ptr);
|
||||
#else
|
||||
simde_float32x4_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_f32
|
||||
#define vld1q_f32(a) simde_vld1q_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld1q_f64(ptr);
|
||||
#else
|
||||
simde_float64x2_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_f64
|
||||
#define vld1q_f64(a) simde_vld1q_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_s8(ptr);
|
||||
#else
|
||||
simde_int8x16_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_s8
|
||||
#define vld1q_s8(a) simde_vld1q_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_s16(ptr);
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_s16
|
||||
#define vld1q_s16(a) simde_vld1q_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_s32(ptr);
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_s32
|
||||
#define vld1q_s32(a) simde_vld1q_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_s64(ptr);
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_s64
|
||||
#define vld1q_s64(a) simde_vld1q_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_u8(ptr);
|
||||
#else
|
||||
simde_uint8x16_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u8
|
||||
#define vld1q_u8(a) simde_vld1q_u8((a))
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_BUG_INTEL_857088)
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x2_t
|
||||
simde_vld1q_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if \
|
||||
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
|
||||
return vld1q_u8_x2(ptr);
|
||||
#else
|
||||
simde_uint8x16_private a_[2];
|
||||
for (size_t i = 0; i < 32; i++) {
|
||||
a_[i / 16].values[i % 16] = ptr[i];
|
||||
}
|
||||
simde_uint8x16x2_t s_ = { { simde_uint8x16_from_private(a_[0]),
|
||||
simde_uint8x16_from_private(a_[1]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u8_x2
|
||||
#define vld1q_u8_x2(a) simde_vld1q_u8_x2((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x3_t
|
||||
simde_vld1q_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) {
|
||||
#if \
|
||||
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
|
||||
return vld1q_u8_x3(ptr);
|
||||
#else
|
||||
simde_uint8x16_private a_[3];
|
||||
for (size_t i = 0; i < 48; i++) {
|
||||
a_[i / 16].values[i % 16] = ptr[i];
|
||||
}
|
||||
simde_uint8x16x3_t s_ = { { simde_uint8x16_from_private(a_[0]),
|
||||
simde_uint8x16_from_private(a_[1]),
|
||||
simde_uint8x16_from_private(a_[2]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u8_x3
|
||||
#define vld1q_u8_x3(a) simde_vld1q_u8_x3((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x4_t
|
||||
simde_vld1q_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
|
||||
#if \
|
||||
defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
|
||||
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \
|
||||
(!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
|
||||
return vld1q_u8_x4(ptr);
|
||||
#else
|
||||
simde_uint8x16_private a_[4];
|
||||
for (size_t i = 0; i < 64; i++) {
|
||||
a_[i / 16].values[i % 16] = ptr[i];
|
||||
}
|
||||
simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]),
|
||||
simde_uint8x16_from_private(a_[1]),
|
||||
simde_uint8x16_from_private(a_[2]),
|
||||
simde_uint8x16_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u8_x4
|
||||
#define vld1q_u8_x4(a) simde_vld1q_u8_x4((a))
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_u16(ptr);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u16
|
||||
#define vld1q_u16(a) simde_vld1q_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_u32(ptr);
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u32
|
||||
#define vld1q_u32(a) simde_vld1q_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_u64(ptr);
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_load(ptr);
|
||||
#else
|
||||
simde_memcpy(&r_, ptr, sizeof(r_));
|
||||
#endif
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_u64
|
||||
#define vld1q_u64(a) simde_vld1q_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD1_H) */
|
||||
407
lib/simd_wrapper/simde/arm/neon/ld1_dup.h
Normal file
407
lib/simd_wrapper/simde/arm/neon/ld1_dup.h
Normal file
@@ -0,0 +1,407 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD1_DUP_H)
|
||||
#define SIMDE_ARM_NEON_LD1_DUP_H
|
||||
|
||||
#include "dup_n.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vld1_dup_f32(simde_float32 const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_f32(ptr);
|
||||
#else
|
||||
return simde_vdup_n_f32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_f32
|
||||
#define vld1_dup_f32(a) simde_vld1_dup_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vld1_dup_f64(simde_float64 const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld1_dup_f64(ptr);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return simde_vreinterpret_f64_s64(vld1_dup_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, ptr)));
|
||||
#else
|
||||
return simde_vdup_n_f64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_f64
|
||||
#define vld1_dup_f64(a) simde_vld1_dup_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vld1_dup_s8(int8_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_s8(ptr);
|
||||
#else
|
||||
return simde_vdup_n_s8(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_s8
|
||||
#define vld1_dup_s8(a) simde_vld1_dup_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vld1_dup_s16(int16_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_s16(ptr);
|
||||
#else
|
||||
return simde_vdup_n_s16(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_s16
|
||||
#define vld1_dup_s16(a) simde_vld1_dup_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vld1_dup_s32(int32_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_s32(ptr);
|
||||
#else
|
||||
return simde_vdup_n_s32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_s32
|
||||
#define vld1_dup_s32(a) simde_vld1_dup_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vld1_dup_s64(int64_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_s64(ptr);
|
||||
#else
|
||||
return simde_vdup_n_s64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_s64
|
||||
#define vld1_dup_s64(a) simde_vld1_dup_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vld1_dup_u8(uint8_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_u8(ptr);
|
||||
#else
|
||||
return simde_vdup_n_u8(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_u8
|
||||
#define vld1_dup_u8(a) simde_vld1_dup_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vld1_dup_u16(uint16_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_u16(ptr);
|
||||
#else
|
||||
return simde_vdup_n_u16(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_u16
|
||||
#define vld1_dup_u16(a) simde_vld1_dup_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vld1_dup_u32(uint32_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_u32(ptr);
|
||||
#else
|
||||
return simde_vdup_n_u32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_u32
|
||||
#define vld1_dup_u32(a) simde_vld1_dup_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vld1_dup_u64(uint64_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1_dup_u64(ptr);
|
||||
#else
|
||||
return simde_vdup_n_u64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_dup_u64
|
||||
#define vld1_dup_u64(a) simde_vld1_dup_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vld1q_dup_f32(simde_float32 const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_f32(ptr);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_float32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
r_.m128 = _mm_load_ps1(ptr);
|
||||
#else
|
||||
r_.v128 = wasm_v128_load32_splat(ptr);
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_f32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_f32
|
||||
#define vld1q_dup_f32(a) simde_vld1q_dup_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vld1q_dup_f64(simde_float64 const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld1q_dup_f64(ptr);
|
||||
#else
|
||||
return simde_vdupq_n_f64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_f64
|
||||
#define vld1q_dup_f64(a) simde_vld1q_dup_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vld1q_dup_s8(int8_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_s8(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int8x16_private r_;
|
||||
|
||||
r_.v128 = wasm_v128_load8_splat(ptr);
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_s8(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_s8
|
||||
#define vld1q_dup_s8(a) simde_vld1q_dup_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vld1q_dup_s16(int16_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_s16(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int16x8_private r_;
|
||||
|
||||
r_.v128 = wasm_v128_load16_splat(ptr);
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_s16(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_s16
|
||||
#define vld1q_dup_s16(a) simde_vld1q_dup_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vld1q_dup_s32(int32_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_s32(ptr);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr)));
|
||||
#else
|
||||
r_.v128 = wasm_v128_load32_splat(ptr);
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_s32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_s32
|
||||
#define vld1q_dup_s32(a) simde_vld1q_dup_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vld1q_dup_s64(int64_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_s64(ptr);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi64x(*ptr);
|
||||
#else
|
||||
r_.v128 = wasm_v128_load64_splat(ptr);
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_s64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_s64
|
||||
#define vld1q_dup_s64(a) simde_vld1q_dup_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vld1q_dup_u8(uint8_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_u8(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint8x16_private r_;
|
||||
|
||||
r_.v128 = wasm_v128_load8_splat(ptr);
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_u8(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_u8
|
||||
#define vld1q_dup_u8(a) simde_vld1q_dup_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vld1q_dup_u16(uint16_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_u16(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint16x8_private r_;
|
||||
|
||||
r_.v128 = wasm_v128_load16_splat(ptr);
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_u16(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_u16
|
||||
#define vld1q_dup_u16(a) simde_vld1q_dup_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vld1q_dup_u32(uint32_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_u32(ptr);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint32x4_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr)));
|
||||
#else
|
||||
r_.v128 = wasm_v128_load32_splat(ptr);
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_u32(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_u32
|
||||
#define vld1q_dup_u32(a) simde_vld1q_dup_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vld1q_dup_u64(uint64_t const * ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld1q_dup_u64(ptr);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint64x2_private r_;
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_set1_epi64x(*HEDLEY_REINTERPRET_CAST(int64_t const *, ptr));
|
||||
#else
|
||||
r_.v128 = wasm_v128_load64_splat(ptr);
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vdupq_n_u64(*ptr);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_dup_u64
|
||||
#define vld1q_dup_u64(a) simde_vld1q_dup_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD1_DUP_H) */
|
||||
359
lib/simd_wrapper/simde/arm/neon/ld1_lane.h
Normal file
359
lib/simd_wrapper/simde/arm/neon/ld1_lane.h
Normal file
@@ -0,0 +1,359 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD1_LANE_H)
|
||||
#define SIMDE_ARM_NEON_LD1_LANE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t simde_vld1_lane_s8(int8_t const *ptr, simde_int8x8_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int8x8_private r = simde_int8x8_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int8x8_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_s8(ptr, src, lane) vld1_lane_s8(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_s8
|
||||
#define vld1_lane_s8(ptr, src, lane) simde_vld1_lane_s8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t simde_vld1_lane_s16(int16_t const *ptr, simde_int16x4_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int16x4_private r = simde_int16x4_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int16x4_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_s16(ptr, src, lane) vld1_lane_s16(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_s16
|
||||
#define vld1_lane_s16(ptr, src, lane) simde_vld1_lane_s16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t simde_vld1_lane_s32(int32_t const *ptr, simde_int32x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x2_private r = simde_int32x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int32x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_s32(ptr, src, lane) vld1_lane_s32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_s32
|
||||
#define vld1_lane_s32(ptr, src, lane) simde_vld1_lane_s32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t simde_vld1_lane_s64(int64_t const *ptr, simde_int64x1_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_int64x1_private r = simde_int64x1_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int64x1_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_s64(ptr, src, lane) vld1_lane_s64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_s64
|
||||
#define vld1_lane_s64(ptr, src, lane) simde_vld1_lane_s64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t simde_vld1_lane_u8(uint8_t const *ptr, simde_uint8x8_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint8x8_private r = simde_uint8x8_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint8x8_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_u8(ptr, src, lane) vld1_lane_u8(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_u8
|
||||
#define vld1_lane_u8(ptr, src, lane) simde_vld1_lane_u8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t simde_vld1_lane_u16(uint16_t const *ptr, simde_uint16x4_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint16x4_private r = simde_uint16x4_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint16x4_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_u16(ptr, src, lane) vld1_lane_u16(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_u16
|
||||
#define vld1_lane_u16(ptr, src, lane) simde_vld1_lane_u16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t simde_vld1_lane_u32(uint32_t const *ptr, simde_uint32x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x2_private r = simde_uint32x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint32x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_u32(ptr, src, lane) vld1_lane_u32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_u32
|
||||
#define vld1_lane_u32(ptr, src, lane) simde_vld1_lane_u32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t simde_vld1_lane_u64(uint64_t const *ptr, simde_uint64x1_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_uint64x1_private r = simde_uint64x1_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint64x1_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_u64(ptr, src, lane) vld1_lane_u64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_u64
|
||||
#define vld1_lane_u64(ptr, src, lane) simde_vld1_lane_u64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t simde_vld1_lane_f32(simde_float32_t const *ptr, simde_float32x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float32x2_private r = simde_float32x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_float32x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1_lane_f32(ptr, src, lane) vld1_lane_f32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_f32
|
||||
#define vld1_lane_f32(ptr, src, lane) simde_vld1_lane_f32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t simde_vld1_lane_f64(simde_float64_t const *ptr, simde_float64x1_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_float64x1_private r = simde_float64x1_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_float64x1_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vld1_lane_f64(ptr, src, lane) vld1_lane_f64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1_lane_f64
|
||||
#define vld1_lane_f64(ptr, src, lane) simde_vld1_lane_f64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t simde_vld1q_lane_s8(int8_t const *ptr, simde_int8x16_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
simde_int8x16_private r = simde_int8x16_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int8x16_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_s8(ptr, src, lane) vld1q_lane_s8(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_s8
|
||||
#define vld1q_lane_s8(ptr, src, lane) simde_vld1q_lane_s8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t simde_vld1q_lane_s16(int16_t const *ptr, simde_int16x8_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int16x8_private r = simde_int16x8_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int16x8_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_s16(ptr, src, lane) vld1q_lane_s16(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_s16
|
||||
#define vld1q_lane_s16(ptr, src, lane) simde_vld1q_lane_s16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t simde_vld1q_lane_s32(int32_t const *ptr, simde_int32x4_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x4_private r = simde_int32x4_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int32x4_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_s32(ptr, src, lane) vld1q_lane_s32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_s32
|
||||
#define vld1q_lane_s32(ptr, src, lane) simde_vld1q_lane_s32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t simde_vld1q_lane_s64(int64_t const *ptr, simde_int64x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int64x2_private r = simde_int64x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_int64x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_s64(ptr, src, lane) vld1q_lane_s64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_s64
|
||||
#define vld1q_lane_s64(ptr, src, lane) simde_vld1q_lane_s64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t simde_vld1q_lane_u8(uint8_t const *ptr, simde_uint8x16_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
simde_uint8x16_private r = simde_uint8x16_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint8x16_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_u8(ptr, src, lane) vld1q_lane_u8(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_u8
|
||||
#define vld1q_lane_u8(ptr, src, lane) simde_vld1q_lane_u8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t simde_vld1q_lane_u16(uint16_t const *ptr, simde_uint16x8_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint16x8_private r = simde_uint16x8_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint16x8_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_u16(ptr, src, lane) vld1q_lane_u16(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_u16
|
||||
#define vld1q_lane_u16(ptr, src, lane) simde_vld1q_lane_u16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t simde_vld1q_lane_u32(uint32_t const *ptr, simde_uint32x4_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x4_private r = simde_uint32x4_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint32x4_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_u32(ptr, src, lane) vld1q_lane_u32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_u32
|
||||
#define vld1q_lane_u32(ptr, src, lane) simde_vld1q_lane_u32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t simde_vld1q_lane_u64(uint64_t const *ptr, simde_uint64x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint64x2_private r = simde_uint64x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_uint64x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_u64(ptr, src, lane) vld1q_lane_u64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_u64
|
||||
#define vld1q_lane_u64(ptr, src, lane) simde_vld1q_lane_u64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t simde_vld1q_lane_f32(simde_float32_t const *ptr, simde_float32x4_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_float32x4_private r = simde_float32x4_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_float32x4_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vld1q_lane_f32(ptr, src, lane) vld1q_lane_f32(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_f32
|
||||
#define vld1q_lane_f32(ptr, src, lane) simde_vld1q_lane_f32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t simde_vld1q_lane_f64(simde_float64_t const *ptr, simde_float64x2_t src,
|
||||
const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float64x2_private r = simde_float64x2_to_private(src);
|
||||
r.values[lane] = *ptr;
|
||||
return simde_float64x2_from_private(r);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vld1q_lane_f64(ptr, src, lane) vld1q_lane_f64(ptr, src, lane)
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld1q_lane_f64
|
||||
#define vld1q_lane_f64(ptr, src, lane) simde_vld1q_lane_f64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD1_LANE_H) */
|
||||
713
lib/simd_wrapper/simde/arm/neon/ld2.h
Normal file
713
lib/simd_wrapper/simde/arm/neon/ld2.h
Normal file
@@ -0,0 +1,713 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD2_H)
|
||||
#define SIMDE_ARM_NEON_LD2_H
|
||||
|
||||
#include "get_low.h"
|
||||
#include "get_high.h"
|
||||
#include "ld1.h"
|
||||
#include "uzp.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
|
||||
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
|
||||
#endif
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if !defined(SIMDE_BUG_INTEL_857088)
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8x2_t
|
||||
simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_s8(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
v128_t a = wasm_v128_load(ptr);
|
||||
simde_int8x16_private q_;
|
||||
q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
simde_int8x16_t q = simde_int8x16_from_private(q_);
|
||||
|
||||
simde_int8x8x2_t u = {
|
||||
simde_vget_low_s8(q),
|
||||
simde_vget_high_s8(q)
|
||||
};
|
||||
return u;
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(simde_vld1q_s8(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
simde_int8x8x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_int8x8_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int8x8x2_t r = { {
|
||||
simde_int8x8_from_private(r_[0]),
|
||||
simde_int8x8_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_s8
|
||||
#define vld2_s8(a) simde_vld2_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4x2_t
|
||||
simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_s16(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(simde_vld1q_s16(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7);
|
||||
simde_int16x4x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_int16x4_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int16x4x2_t r = { {
|
||||
simde_int16x4_from_private(r_[0]),
|
||||
simde_int16x4_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_s16
|
||||
#define vld2_s16(a) simde_vld2_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2x2_t
|
||||
simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_s32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(simde_vld1q_s32(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
|
||||
simde_int32x2x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_int32x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int32x2x2_t r = { {
|
||||
simde_int32x2_from_private(r_[0]),
|
||||
simde_int32x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_s32
|
||||
#define vld2_s32(a) simde_vld2_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1x2_t
|
||||
simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_s64(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(simde_vld1q_s64(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
|
||||
simde_int64x1x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_int64x1_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int64x1x2_t r = { {
|
||||
simde_int64x1_from_private(r_[0]),
|
||||
simde_int64x1_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_s64
|
||||
#define vld2_s64(a) simde_vld2_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8x2_t
|
||||
simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_u8(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
v128_t a = wasm_v128_load(ptr);
|
||||
simde_uint8x16_private q_;
|
||||
q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
simde_uint8x16_t q = simde_uint8x16_from_private(q_);
|
||||
|
||||
simde_uint8x8x2_t u = {
|
||||
simde_vget_low_u8(q),
|
||||
simde_vget_high_u8(q)
|
||||
};
|
||||
return u;
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vld1q_u8(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
simde_uint8x8x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_uint8x8_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint8x8x2_t r = { {
|
||||
simde_uint8x8_from_private(r_[0]),
|
||||
simde_uint8x8_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_u8
|
||||
#define vld2_u8(a) simde_vld2_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4x2_t
|
||||
simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_u16(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vld1q_u16(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7);
|
||||
simde_uint16x4x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_uint16x4_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint16x4x2_t r = { {
|
||||
simde_uint16x4_from_private(r_[0]),
|
||||
simde_uint16x4_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_u16
|
||||
#define vld2_u16(a) simde_vld2_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2x2_t
|
||||
simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_u32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vld1q_u32(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
|
||||
simde_uint32x2x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_uint32x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint32x2x2_t r = { {
|
||||
simde_uint32x2_from_private(r_[0]),
|
||||
simde_uint32x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_u32
|
||||
#define vld2_u32(a) simde_vld2_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1x2_t
|
||||
simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_u64(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_vld1q_u64(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
|
||||
simde_uint64x1x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_uint64x1_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint64x1x2_t r = { {
|
||||
simde_uint64x1_from_private(r_[0]),
|
||||
simde_uint64x1_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_u64
|
||||
#define vld2_u64(a) simde_vld2_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2x2_t
|
||||
simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2_f32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(simde_vld1q_f32(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3);
|
||||
simde_float32x2x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_float32x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float32x2x2_t r = { {
|
||||
simde_float32x2_from_private(r_[0]),
|
||||
simde_float32x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_f32
|
||||
#define vld2_f32(a) simde_vld2_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1x2_t
|
||||
simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld2_f64(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(simde_vld1q_f64(ptr));
|
||||
a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1);
|
||||
simde_float64x1x2_t r;
|
||||
simde_memcpy(&r, &a_, sizeof(r));
|
||||
return r;
|
||||
#else
|
||||
simde_float64x1_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float64x1x2_t r = { {
|
||||
simde_float64x1_from_private(r_[0]),
|
||||
simde_float64x1_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2_f64
|
||||
#define vld2_f64(a) simde_vld2_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16x2_t
|
||||
simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_s8(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_s8(
|
||||
simde_vld1q_s8(&(ptr[0])),
|
||||
simde_vld1q_s8(&(ptr[16]))
|
||||
);
|
||||
#else
|
||||
simde_int8x16_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int8x16x2_t r = { {
|
||||
simde_int8x16_from_private(r_[0]),
|
||||
simde_int8x16_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_s8
|
||||
#define vld2q_s8(a) simde_vld2q_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4x2_t
|
||||
simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_s32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_s32(
|
||||
simde_vld1q_s32(&(ptr[0])),
|
||||
simde_vld1q_s32(&(ptr[4]))
|
||||
);
|
||||
#else
|
||||
simde_int32x4_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int32x4x2_t r = { {
|
||||
simde_int32x4_from_private(r_[0]),
|
||||
simde_int32x4_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_s32
|
||||
#define vld2q_s32(a) simde_vld2q_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8x2_t
|
||||
simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_s16(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_s16(
|
||||
simde_vld1q_s16(&(ptr[0])),
|
||||
simde_vld1q_s16(&(ptr[8]))
|
||||
);
|
||||
#else
|
||||
simde_int16x8_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int16x8x2_t r = { {
|
||||
simde_int16x8_from_private(r_[0]),
|
||||
simde_int16x8_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_s16
|
||||
#define vld2q_s16(a) simde_vld2q_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2x2_t
|
||||
simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld2q_s64(ptr);
|
||||
#else
|
||||
simde_int64x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int64x2x2_t r = { {
|
||||
simde_int64x2_from_private(r_[0]),
|
||||
simde_int64x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_s64
|
||||
#define vld2q_s64(a) simde_vld2q_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x2_t
|
||||
simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_u8(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_u8(
|
||||
simde_vld1q_u8(&(ptr[ 0])),
|
||||
simde_vld1q_u8(&(ptr[16]))
|
||||
);
|
||||
#else
|
||||
simde_uint8x16_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint8x16x2_t r = { {
|
||||
simde_uint8x16_from_private(r_[0]),
|
||||
simde_uint8x16_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_u8
|
||||
#define vld2q_u8(a) simde_vld2q_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8x2_t
|
||||
simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_u16(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_u16(
|
||||
simde_vld1q_u16(&(ptr[0])),
|
||||
simde_vld1q_u16(&(ptr[8]))
|
||||
);
|
||||
#else
|
||||
simde_uint16x8_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint16x8x2_t r = { {
|
||||
simde_uint16x8_from_private(r_[0]),
|
||||
simde_uint16x8_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_u16
|
||||
#define vld2q_u16(a) simde_vld2q_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4x2_t
|
||||
simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_u32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_u32(
|
||||
simde_vld1q_u32(&(ptr[0])),
|
||||
simde_vld1q_u32(&(ptr[4]))
|
||||
);
|
||||
#else
|
||||
simde_uint32x4_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint32x4x2_t r = { {
|
||||
simde_uint32x4_from_private(r_[0]),
|
||||
simde_uint32x4_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_u32
|
||||
#define vld2q_u32(a) simde_vld2q_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2x2_t
|
||||
simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld2q_u64(ptr);
|
||||
#else
|
||||
simde_uint64x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint64x2x2_t r = { {
|
||||
simde_uint64x2_from_private(r_[0]),
|
||||
simde_uint64x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_u64
|
||||
#define vld2q_u64(a) simde_vld2q_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4x2_t
|
||||
simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld2q_f32(ptr);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return
|
||||
simde_vuzpq_f32(
|
||||
simde_vld1q_f32(&(ptr[0])),
|
||||
simde_vld1q_f32(&(ptr[4]))
|
||||
);
|
||||
#else
|
||||
simde_float32x4_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float32x4x2_t r = { {
|
||||
simde_float32x4_from_private(r_[0]),
|
||||
simde_float32x4_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_f32
|
||||
#define vld2q_f32(a) simde_vld2q_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2x2_t
|
||||
simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld2q_f64(ptr);
|
||||
#else
|
||||
simde_float64x2_private r_[2];
|
||||
|
||||
for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float64x2x2_t r = { {
|
||||
simde_float64x2_from_private(r_[0]),
|
||||
simde_float64x2_from_private(r_[1]),
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld2q_f64
|
||||
#define vld2q_f64(a) simde_vld2q_f64((a))
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD2_H) */
|
||||
609
lib/simd_wrapper/simde/arm/neon/ld3.h
Normal file
609
lib/simd_wrapper/simde/arm/neon/ld3.h
Normal file
@@ -0,0 +1,609 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD3_H)
|
||||
#define SIMDE_ARM_NEON_LD3_H
|
||||
|
||||
#include "types.h"
|
||||
#include "ld1.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
|
||||
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
|
||||
#endif
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if !defined(SIMDE_BUG_INTEL_857088)
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2x3_t
|
||||
simde_vld3_f32(simde_float32 const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_f32(ptr);
|
||||
#else
|
||||
simde_float32x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float32x2x3_t r = { {
|
||||
simde_float32x2_from_private(r_[0]),
|
||||
simde_float32x2_from_private(r_[1]),
|
||||
simde_float32x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_f32
|
||||
#define vld3_f32(a) simde_vld3_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1x3_t
|
||||
simde_vld3_f64(simde_float64 const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld3_f64(ptr);
|
||||
#else
|
||||
simde_float64x1_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float64x1x3_t r = { {
|
||||
simde_float64x1_from_private(r_[0]),
|
||||
simde_float64x1_from_private(r_[1]),
|
||||
simde_float64x1_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_f64
|
||||
#define vld3_f64(a) simde_vld3_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8x3_t
|
||||
simde_vld3_s8(int8_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_s8(ptr);
|
||||
#else
|
||||
simde_int8x8_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int8x8x3_t r = { {
|
||||
simde_int8x8_from_private(r_[0]),
|
||||
simde_int8x8_from_private(r_[1]),
|
||||
simde_int8x8_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_s8
|
||||
#define vld3_s8(a) simde_vld3_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4x3_t
|
||||
simde_vld3_s16(int16_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_s16(ptr);
|
||||
#else
|
||||
simde_int16x4_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int16x4x3_t r = { {
|
||||
simde_int16x4_from_private(r_[0]),
|
||||
simde_int16x4_from_private(r_[1]),
|
||||
simde_int16x4_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_s16
|
||||
#define vld3_s16(a) simde_vld3_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2x3_t
|
||||
simde_vld3_s32(int32_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_s32(ptr);
|
||||
#else
|
||||
simde_int32x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int32x2x3_t r = { {
|
||||
simde_int32x2_from_private(r_[0]),
|
||||
simde_int32x2_from_private(r_[1]),
|
||||
simde_int32x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_s32
|
||||
#define vld3_s32(a) simde_vld3_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1x3_t
|
||||
simde_vld3_s64(int64_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_s64(ptr);
|
||||
#else
|
||||
simde_int64x1_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int64x1x3_t r = { {
|
||||
simde_int64x1_from_private(r_[0]),
|
||||
simde_int64x1_from_private(r_[1]),
|
||||
simde_int64x1_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_s64
|
||||
#define vld3_s64(a) simde_vld3_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8x3_t
|
||||
simde_vld3_u8(uint8_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_u8(ptr);
|
||||
#else
|
||||
simde_uint8x8_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint8x8x3_t r = { {
|
||||
simde_uint8x8_from_private(r_[0]),
|
||||
simde_uint8x8_from_private(r_[1]),
|
||||
simde_uint8x8_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_u8
|
||||
#define vld3_u8(a) simde_vld3_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4x3_t
|
||||
simde_vld3_u16(uint16_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_u16(ptr);
|
||||
#else
|
||||
simde_uint16x4_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint16x4x3_t r = { {
|
||||
simde_uint16x4_from_private(r_[0]),
|
||||
simde_uint16x4_from_private(r_[1]),
|
||||
simde_uint16x4_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_u16
|
||||
#define vld3_u16(a) simde_vld3_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2x3_t
|
||||
simde_vld3_u32(uint32_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_u32(ptr);
|
||||
#else
|
||||
simde_uint32x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint32x2x3_t r = { {
|
||||
simde_uint32x2_from_private(r_[0]),
|
||||
simde_uint32x2_from_private(r_[1]),
|
||||
simde_uint32x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_u32
|
||||
#define vld3_u32(a) simde_vld3_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1x3_t
|
||||
simde_vld3_u64(uint64_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3_u64(ptr);
|
||||
#else
|
||||
simde_uint64x1_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint64x1x3_t r = { {
|
||||
simde_uint64x1_from_private(r_[0]),
|
||||
simde_uint64x1_from_private(r_[1]),
|
||||
simde_uint64x1_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3_u64
|
||||
#define vld3_u64(a) simde_vld3_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4x3_t
|
||||
simde_vld3q_f32(simde_float32 const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_f32(ptr);
|
||||
#else
|
||||
simde_float32x4_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float32x4x3_t r = { {
|
||||
simde_float32x4_from_private(r_[0]),
|
||||
simde_float32x4_from_private(r_[1]),
|
||||
simde_float32x4_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_f32
|
||||
#define vld3q_f32(a) simde_vld3q_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2x3_t
|
||||
simde_vld3q_f64(simde_float64 const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld3q_f64(ptr);
|
||||
#else
|
||||
simde_float64x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_float64x2x3_t r = { {
|
||||
simde_float64x2_from_private(r_[0]),
|
||||
simde_float64x2_from_private(r_[1]),
|
||||
simde_float64x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_f64
|
||||
#define vld3q_f64(a) simde_vld3q_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16x3_t
|
||||
simde_vld3q_s8(int8_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_s8(ptr);
|
||||
#else
|
||||
simde_int8x16_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int8x16x3_t r = { {
|
||||
simde_int8x16_from_private(r_[0]),
|
||||
simde_int8x16_from_private(r_[1]),
|
||||
simde_int8x16_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_s8
|
||||
#define vld3q_s8(a) simde_vld3q_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8x3_t
|
||||
simde_vld3q_s16(int16_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_s16(ptr);
|
||||
#else
|
||||
simde_int16x8_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int16x8x3_t r = { {
|
||||
simde_int16x8_from_private(r_[0]),
|
||||
simde_int16x8_from_private(r_[1]),
|
||||
simde_int16x8_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_s16
|
||||
#define vld3q_s16(a) simde_vld3q_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4x3_t
|
||||
simde_vld3q_s32(int32_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_s32(ptr);
|
||||
#else
|
||||
simde_int32x4_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int32x4x3_t r = { {
|
||||
simde_int32x4_from_private(r_[0]),
|
||||
simde_int32x4_from_private(r_[1]),
|
||||
simde_int32x4_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_s32
|
||||
#define vld3q_s32(a) simde_vld3q_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2x3_t
|
||||
simde_vld3q_s64(int64_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld3q_s64(ptr);
|
||||
#else
|
||||
simde_int64x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_int64x2x3_t r = { {
|
||||
simde_int64x2_from_private(r_[0]),
|
||||
simde_int64x2_from_private(r_[1]),
|
||||
simde_int64x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_s64
|
||||
#define vld3q_s64(a) simde_vld3q_s64((a))
|
||||
#endif
|
||||
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x3_t
|
||||
simde_vld3q_u8(uint8_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_u8(ptr);
|
||||
#else
|
||||
simde_uint8x16_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint8x16x3_t r = { {
|
||||
simde_uint8x16_from_private(r_[0]),
|
||||
simde_uint8x16_from_private(r_[1]),
|
||||
simde_uint8x16_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_u8
|
||||
#define vld3q_u8(a) simde_vld3q_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8x3_t
|
||||
simde_vld3q_u16(uint16_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_u16(ptr);
|
||||
#else
|
||||
simde_uint16x8_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint16x8x3_t r = { {
|
||||
simde_uint16x8_from_private(r_[0]),
|
||||
simde_uint16x8_from_private(r_[1]),
|
||||
simde_uint16x8_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_u16
|
||||
#define vld3q_u16(a) simde_vld3q_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4x3_t
|
||||
simde_vld3q_u32(uint32_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld3q_u32(ptr);
|
||||
#else
|
||||
simde_uint32x4_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint32x4x3_t r = { {
|
||||
simde_uint32x4_from_private(r_[0]),
|
||||
simde_uint32x4_from_private(r_[1]),
|
||||
simde_uint32x4_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_u32
|
||||
#define vld3q_u32(a) simde_vld3q_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2x3_t
|
||||
simde_vld3q_u64(uint64_t const *ptr) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld3q_u64(ptr);
|
||||
#else
|
||||
simde_uint64x2_private r_[3];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) {
|
||||
for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) {
|
||||
r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))];
|
||||
}
|
||||
}
|
||||
|
||||
simde_uint64x2x3_t r = { {
|
||||
simde_uint64x2_from_private(r_[0]),
|
||||
simde_uint64x2_from_private(r_[1]),
|
||||
simde_uint64x2_from_private(r_[2])
|
||||
} };
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld3q_u64
|
||||
#define vld3q_u64(a) simde_vld3q_u64((a))
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD3_H) */
|
||||
486
lib/simd_wrapper/simde/arm/neon/ld4.h
Normal file
486
lib/simd_wrapper/simde/arm/neon/ld4.h
Normal file
@@ -0,0 +1,486 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD4_H)
|
||||
#define SIMDE_ARM_NEON_LD4_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
#if HEDLEY_GCC_VERSION_CHECK(7,0,0)
|
||||
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
|
||||
#endif
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if !defined(SIMDE_BUG_INTEL_857088)
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2x4_t
|
||||
simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_f32(ptr);
|
||||
#else
|
||||
simde_float32x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]),
|
||||
simde_float32x2_from_private(a_[2]), simde_float32x2_from_private(a_[3]) } };
|
||||
return (s_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_f32
|
||||
#define vld4_f32(a) simde_vld4_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1x4_t
|
||||
simde_vld4_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld4_f64(ptr);
|
||||
#else
|
||||
simde_float64x1_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]),
|
||||
simde_float64x1_from_private(a_[2]), simde_float64x1_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_f64
|
||||
#define vld4_f64(a) simde_vld4_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8x4_t
|
||||
simde_vld4_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_s8(ptr);
|
||||
#else
|
||||
simde_int8x8_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]),
|
||||
simde_int8x8_from_private(a_[2]), simde_int8x8_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_s8
|
||||
#define vld4_s8(a) simde_vld4_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4x4_t
|
||||
simde_vld4_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_s16(ptr);
|
||||
#else
|
||||
simde_int16x4_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]),
|
||||
simde_int16x4_from_private(a_[2]), simde_int16x4_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_s16
|
||||
#define vld4_s16(a) simde_vld4_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2x4_t
|
||||
simde_vld4_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_s32(ptr);
|
||||
#else
|
||||
simde_int32x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]),
|
||||
simde_int32x2_from_private(a_[2]), simde_int32x2_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_s32
|
||||
#define vld4_s32(a) simde_vld4_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1x4_t
|
||||
simde_vld4_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_s64(ptr);
|
||||
#else
|
||||
simde_int64x1_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]),
|
||||
simde_int64x1_from_private(a_[2]), simde_int64x1_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_s64
|
||||
#define vld4_s64(a) simde_vld4_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8x4_t
|
||||
simde_vld4_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_u8(ptr);
|
||||
#else
|
||||
simde_uint8x8_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]),
|
||||
simde_uint8x8_from_private(a_[2]), simde_uint8x8_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_u8
|
||||
#define vld4_u8(a) simde_vld4_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4x4_t
|
||||
simde_vld4_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_u16(ptr);
|
||||
#else
|
||||
simde_uint16x4_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]),
|
||||
simde_uint16x4_from_private(a_[2]), simde_uint16x4_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_u16
|
||||
#define vld4_u16(a) simde_vld4_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2x4_t
|
||||
simde_vld4_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_u32(ptr);
|
||||
#else
|
||||
simde_uint32x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]),
|
||||
simde_uint32x2_from_private(a_[2]), simde_uint32x2_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_u32
|
||||
#define vld4_u32(a) simde_vld4_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1x4_t
|
||||
simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4_u64(ptr);
|
||||
#else
|
||||
simde_uint64x1_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]),
|
||||
simde_uint64x1_from_private(a_[2]), simde_uint64x1_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_u64
|
||||
#define vld4_u64(a) simde_vld4_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4x4_t
|
||||
simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_f32(ptr);
|
||||
#else
|
||||
simde_float32x4_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]),
|
||||
simde_float32x4_from_private(a_[2]), simde_float32x4_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_f32
|
||||
#define vld4q_f32(a) simde_vld4q_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2x4_t
|
||||
simde_vld4q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld4q_f64(ptr);
|
||||
#else
|
||||
simde_float64x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]),
|
||||
simde_float64x2_from_private(a_[2]), simde_float64x2_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_f64
|
||||
#define vld4q_f64(a) simde_vld4q_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16x4_t
|
||||
simde_vld4q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_s8(ptr);
|
||||
#else
|
||||
simde_int8x16_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]),
|
||||
simde_int8x16_from_private(a_[2]), simde_int8x16_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_s8
|
||||
#define vld4q_s8(a) simde_vld4q_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8x4_t
|
||||
simde_vld4q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_s16(ptr);
|
||||
#else
|
||||
simde_int16x8_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]),
|
||||
simde_int16x8_from_private(a_[2]), simde_int16x8_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_s16
|
||||
#define vld4q_s16(a) simde_vld4q_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4x4_t
|
||||
simde_vld4q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_s32(ptr);
|
||||
#else
|
||||
simde_int32x4_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]),
|
||||
simde_int32x4_from_private(a_[2]), simde_int32x4_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_s32
|
||||
#define vld4q_s32(a) simde_vld4q_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2x4_t
|
||||
simde_vld4q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld4q_s64(ptr);
|
||||
#else
|
||||
simde_int64x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]),
|
||||
simde_int64x2_from_private(a_[2]), simde_int64x2_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_s64
|
||||
#define vld4q_s64(a) simde_vld4q_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x4_t
|
||||
simde_vld4q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_u8(ptr);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
// Let a, b, c, d be the 4 uint8x16 to return, they are laid out in memory:
|
||||
// [a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3,
|
||||
// a4, b4, c4, d4, a5, b5, c5, d5, a6, b6, c6, d6, a7, b7, c7, d7,
|
||||
// a8, b8, c8, d8, a9, b9, c9, d9, a10, b10, c10, d10, a11, b11, c11, d11,
|
||||
// a12, b12, c12, d12, a13, b13, c13, d13, a14, b14, c14, d14, a15, b15, c15, d15]
|
||||
v128_t a_ = wasm_v128_load(&ptr[0]);
|
||||
v128_t b_ = wasm_v128_load(&ptr[16]);
|
||||
v128_t c_ = wasm_v128_load(&ptr[32]);
|
||||
v128_t d_ = wasm_v128_load(&ptr[48]);
|
||||
|
||||
v128_t a_low_b_low = wasm_i8x16_shuffle(a_, b_, 0, 4, 8, 12, 16, 20, 24, 28,
|
||||
1, 5, 9, 13, 17, 21, 25, 29);
|
||||
v128_t a_high_b_high = wasm_i8x16_shuffle(c_, d_, 0, 4, 8, 12, 16, 20, 24,
|
||||
28, 1, 5, 9, 13, 17, 21, 25, 29);
|
||||
v128_t a = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
|
||||
v128_t b = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 8, 9, 10, 11, 12,
|
||||
13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
|
||||
v128_t c_low_d_low = wasm_i8x16_shuffle(a_, b_, 2, 6, 10, 14, 18, 22, 26,
|
||||
30, 3, 7, 11, 15, 19, 23, 27, 31);
|
||||
v128_t c_high_d_high = wasm_i8x16_shuffle(c_, d_, 2, 6, 10, 14, 18, 22, 26,
|
||||
30, 3, 7, 11, 15, 19, 23, 27, 31);
|
||||
v128_t c = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 0, 1, 2, 3, 4, 5,
|
||||
6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
|
||||
v128_t d = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 8, 9, 10, 11, 12,
|
||||
13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
|
||||
simde_uint8x16_private r_[4];
|
||||
r_[0].v128 = a;
|
||||
r_[1].v128 = b;
|
||||
r_[2].v128 = c;
|
||||
r_[3].v128 = d;
|
||||
simde_uint8x16x4_t s_ = {{simde_uint8x16_from_private(r_[0]),
|
||||
simde_uint8x16_from_private(r_[1]),
|
||||
simde_uint8x16_from_private(r_[2]),
|
||||
simde_uint8x16_from_private(r_[3])}};
|
||||
return s_;
|
||||
#else
|
||||
simde_uint8x16_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]),
|
||||
simde_uint8x16_from_private(a_[2]), simde_uint8x16_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_u8
|
||||
#define vld4q_u8(a) simde_vld4q_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8x4_t
|
||||
simde_vld4q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_u16(ptr);
|
||||
#else
|
||||
simde_uint16x8_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]),
|
||||
simde_uint16x8_from_private(a_[2]), simde_uint16x8_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_u16
|
||||
#define vld4q_u16(a) simde_vld4q_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4x4_t
|
||||
simde_vld4q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vld4q_u32(ptr);
|
||||
#else
|
||||
simde_uint32x4_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]),
|
||||
simde_uint32x4_from_private(a_[2]), simde_uint32x4_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_u32
|
||||
#define vld4q_u32(a) simde_vld4q_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2x4_t
|
||||
simde_vld4q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vld4q_u64(ptr);
|
||||
#else
|
||||
simde_uint64x2_private a_[4];
|
||||
for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 4 ; i++) {
|
||||
a_[i % 4].values[i / 4] = ptr[i];
|
||||
}
|
||||
simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]),
|
||||
simde_uint64x2_from_private(a_[2]), simde_uint64x2_from_private(a_[3]) } };
|
||||
return s_;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_u64
|
||||
#define vld4q_u64(a) simde_vld4q_u64((a))
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD4_H) */
|
||||
593
lib/simd_wrapper/simde/arm/neon/ld4_lane.h
Normal file
593
lib/simd_wrapper/simde/arm/neon/ld4_lane.h
Normal file
@@ -0,0 +1,593 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Zhi An Ng <zhin@google.com> (Copyright owned by Google, LLC)
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
/* In older versions of clang, __builtin_neon_vld4_lane_v would
|
||||
* generate a diagnostic for most variants (those which didn't
|
||||
* use signed 8-bit integers). I believe this was fixed by
|
||||
* 78ad22e0cc6390fcd44b2b7b5132f1b960ff975d.
|
||||
*
|
||||
* Since we have to use macros (due to the immediate-mode parameter)
|
||||
* we can't just disable it once in this file; we have to use statement
|
||||
* exprs and push / pop the stack for each macro. */
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_LD4_LANE_H)
|
||||
#define SIMDE_ARM_NEON_LD4_LANE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if !defined(SIMDE_BUG_INTEL_857088)
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8x4_t
|
||||
simde_vld4_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int8x8x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int8x8_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_s8(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s8(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_s8(ptr, src, lane) vld4_lane_s8(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_s8
|
||||
#define vld4_lane_s8(ptr, src, lane) simde_vld4_lane_s8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4x4_t
|
||||
simde_vld4_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int16x4x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int16x4_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_s16(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s16(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_s16(ptr, src, lane) vld4_lane_s16(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_s16
|
||||
#define vld4_lane_s16(ptr, src, lane) simde_vld4_lane_s16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2x4_t
|
||||
simde_vld4_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int32x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_s32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_s32(ptr, src, lane) vld4_lane_s32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_s32
|
||||
#define vld4_lane_s32(ptr, src, lane) simde_vld4_lane_s32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1x4_t
|
||||
simde_vld4_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_int64x1x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int64x1_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_s64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_s64(ptr, src, lane) vld4_lane_s64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_s64
|
||||
#define vld4_lane_s64(ptr, src, lane) simde_vld4_lane_s64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8x4_t
|
||||
simde_vld4_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint8x8x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint8x8_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_u8(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u8(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_u8(ptr, src, lane) vld4_lane_u8(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_u8
|
||||
#define vld4_lane_u8(ptr, src, lane) simde_vld4_lane_u8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4x4_t
|
||||
simde_vld4_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint16x4x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint16x4_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_u16(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u16(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_u16(ptr, src, lane) vld4_lane_u16(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_u16
|
||||
#define vld4_lane_u16(ptr, src, lane) simde_vld4_lane_u16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2x4_t
|
||||
simde_vld4_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint32x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_u32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_u32(ptr, src, lane) vld4_lane_u32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_u32
|
||||
#define vld4_lane_u32(ptr, src, lane) simde_vld4_lane_u32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1x4_t
|
||||
simde_vld4_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_uint64x1x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint64x1_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_u64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_u64(ptr, src, lane) vld4_lane_u64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_u64
|
||||
#define vld4_lane_u64(ptr, src, lane) simde_vld4_lane_u64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2x4_t
|
||||
simde_vld4_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float32x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_float32x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_f32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_f32(ptr, src, lane) vld4_lane_f32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_f32
|
||||
#define vld4_lane_f32(ptr, src, lane) simde_vld4_lane_f32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1x4_t
|
||||
simde_vld4_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_float64x1x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_float64x1_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4_lane_f64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4_lane_f64(ptr, src, lane) vld4_lane_f64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4_lane_f64
|
||||
#define vld4_lane_f64(ptr, src, lane) simde_vld4_lane_f64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16x4_t
|
||||
simde_vld4q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
simde_int8x16x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int8x16_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_s8(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s8(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_s8(ptr, src, lane) vld4q_lane_s8(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_s8
|
||||
#define vld4q_lane_s8(ptr, src, lane) simde_vld4q_lane_s8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8x4_t
|
||||
simde_vld4q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int16x8x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int16x8_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_s16(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s16(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_s16(ptr, src, lane) vld4q_lane_s16(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_s16
|
||||
#define vld4q_lane_s16(ptr, src, lane) simde_vld4q_lane_s16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4x4_t
|
||||
simde_vld4q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x4x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int32x4_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_s32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_s32(ptr, src, lane) vld4q_lane_s32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_s32
|
||||
#define vld4q_lane_s32(ptr, src, lane) simde_vld4q_lane_s32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2x4_t
|
||||
simde_vld4q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int64x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_int64x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_s64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_s64(ptr, src, lane) vld4q_lane_s64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_s64
|
||||
#define vld4q_lane_s64(ptr, src, lane) simde_vld4q_lane_s64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16x4_t
|
||||
simde_vld4q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) {
|
||||
simde_uint8x16x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint8x16_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_u8(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u8(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_u8(ptr, src, lane) vld4q_lane_u8(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_u8
|
||||
#define vld4q_lane_u8(ptr, src, lane) simde_vld4q_lane_u8((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8x4_t
|
||||
simde_vld4q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint16x8x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint16x8_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_u16(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u16(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_u16
|
||||
#define vld4q_lane_u16(ptr, src, lane) simde_vld4q_lane_u16((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4x4_t
|
||||
simde_vld4q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x4x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint32x4_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_u32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_u32
|
||||
#define vld4q_lane_u32(ptr, src, lane) simde_vld4q_lane_u32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2x4_t
|
||||
simde_vld4q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint64x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_uint64x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_u64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_u64(ptr, src, lane) vld4q_lane_u64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_u64
|
||||
#define vld4q_lane_u64(ptr, src, lane) simde_vld4q_lane_u64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4x4_t
|
||||
simde_vld4q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_float32x4x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_float32x4_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_f32(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f32(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_f32(ptr, src, lane) vld4q_lane_f32(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_f32
|
||||
#define vld4q_lane_f32(ptr, src, lane) simde_vld4q_lane_f32((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2x4_t
|
||||
simde_vld4q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t src, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float64x2x4_t r;
|
||||
|
||||
for (size_t i = 0 ; i < 4 ; i++) {
|
||||
simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]);
|
||||
tmp_.values[lane] = ptr[i];
|
||||
r.val[i] = simde_float64x2_from_private(tmp_);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)
|
||||
#define simde_vld4q_lane_f64(ptr, src, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f64(ptr, src, lane))
|
||||
#else
|
||||
#define simde_vld4q_lane_f64(ptr, src, lane) vld4q_lane_f64(ptr, src, lane)
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vld4q_lane_f64
|
||||
#define vld4q_lane_f64(ptr, src, lane) simde_vld4q_lane_f64((ptr), (src), (lane))
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_BUG_INTEL_857088) */
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_LD4_LANE_H) */
|
||||
624
lib/simd_wrapper/simde/arm/neon/max.h
Normal file
624
lib/simd_wrapper/simde/arm/neon/max.h
Normal file
@@ -0,0 +1,624 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MAX_H)
|
||||
#define SIMDE_ARM_NEON_MAX_H
|
||||
|
||||
#include "types.h"
|
||||
#include "cgt.h"
|
||||
#include "bsl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF);
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_f32
|
||||
#define vmax_f32(a, b) simde_vmax_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmax_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN);
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_f64
|
||||
#define vmax_f64(a, b) simde_vmax_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_s8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s8(simde_vcgt_s8(a, b), a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_s8
|
||||
#define vmax_s8(a, b) simde_vmax_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s16(simde_vcgt_s16(a, b), a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_s16
|
||||
#define vmax_s16(a, b) simde_vmax_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_s32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s32(simde_vcgt_s32(a, b), a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_s32
|
||||
#define vmax_s32(a, b) simde_vmax_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s64(simde_vcgt_s64(a, b), a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_u8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u8(simde_vcgt_u8(a, b), a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_u8
|
||||
#define vmax_u8(a, b) simde_vmax_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_u16(a, b);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE)
|
||||
return simde_vbsl_u16(simde_vcgt_u16(a, b), a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
|
||||
r_.m64 = _mm_add_pi16(b_.m64, _mm_subs_pu16(a_.m64, b_.m64));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_u16
|
||||
#define vmax_u16(a, b) simde_vmax_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmax_u32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u32(simde_vcgt_u32(a, b), a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmax_u32
|
||||
#define vmax_u32(a, b) simde_vmax_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u64(simde_vcgt_u64(a, b), a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return
|
||||
vec_sel(
|
||||
b,
|
||||
a,
|
||||
vec_orc(
|
||||
vec_cmpgt(a, b),
|
||||
vec_cmpeq(a, a)
|
||||
)
|
||||
);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) cmpres = vec_cmpeq(a, a);
|
||||
return
|
||||
vec_sel(
|
||||
b,
|
||||
a,
|
||||
vec_or(
|
||||
vec_cmpgt(a, b),
|
||||
vec_nor(cmpres, cmpres)
|
||||
)
|
||||
);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.m128 = _mm_max_ps(a_.m128, b_.m128);
|
||||
#elif defined(SIMDE_X86_SSE_NATIVE)
|
||||
__m128 m = _mm_or_ps(_mm_cmpneq_ps(a_.m128, a_.m128), _mm_cmpgt_ps(a_.m128, b_.m128));
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128 = _mm_blendv_ps(b_.m128, a_.m128, m);
|
||||
#else
|
||||
r_.m128 =
|
||||
_mm_or_ps(
|
||||
_mm_and_ps(m, a_.m128),
|
||||
_mm_andnot_ps(m, b_.m128)
|
||||
);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_max(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF);
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_f32
|
||||
#define vmaxq_f32(a, b) simde_vmaxq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmaxq_f64(a, b);
|
||||
#elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
|
||||
return vec_max(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.m128d = _mm_max_pd(a_.m128d, b_.m128d);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128d m = _mm_or_pd(_mm_cmpneq_pd(a_.m128d, a_.m128d), _mm_cmpgt_pd(a_.m128d, b_.m128d));
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128d = _mm_blendv_pd(b_.m128d, a_.m128d, m);
|
||||
#else
|
||||
r_.m128d =
|
||||
_mm_or_pd(
|
||||
_mm_and_pd(m, a_.m128d),
|
||||
_mm_andnot_pd(m, b_.m128d)
|
||||
);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_max(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN);
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_f64
|
||||
#define vmaxq_f64(a, b) simde_vmaxq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_max_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
__m128i m = _mm_cmpgt_epi8(a_.m128i, b_.m128i);
|
||||
r_.m128i = _mm_or_si128(_mm_and_si128(m, a_.m128i), _mm_andnot_si128(m, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_s8(simde_vcgtq_s8(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_s8
|
||||
#define vmaxq_s8(a, b) simde_vmaxq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_max_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_s16(simde_vcgtq_s16(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_s16
|
||||
#define vmaxq_s16(a, b) simde_vmaxq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE4_1_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_max_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_s32(simde_vcgtq_s32(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_s32
|
||||
#define vmaxq_s32(a, b) simde_vmaxq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_x_vmaxq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#else
|
||||
return simde_vbslq_s64(simde_vcgtq_s64(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_max_epu8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_u8(simde_vcgtq_u8(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_u8
|
||||
#define vmaxq_u8(a, b) simde_vmaxq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE2_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_max_epu16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
|
||||
r_.m128i = _mm_add_epi16(b_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_u16(simde_vcgtq_u16(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_u16
|
||||
#define vmaxq_u16(a, b) simde_vmaxq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmaxq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#elif \
|
||||
defined(SIMDE_X86_SSE4_1_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_max_epu32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_max(a_.v128, b_.v128);
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vbslq_u32(simde_vcgtq_u32(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxq_u32
|
||||
#define vmaxq_u32(a, b) simde_vmaxq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_x_vmaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#else
|
||||
return simde_vbslq_u64(simde_vcgtq_u64(a, b), a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MAX_H) */
|
||||
217
lib/simd_wrapper/simde/arm/neon/maxnm.h
Normal file
217
lib/simd_wrapper/simde/arm/neon/maxnm.h
Normal file
@@ -0,0 +1,217 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MAXNM_H)
|
||||
#define SIMDE_ARM_NEON_MAXNM_H
|
||||
|
||||
#include "types.h"
|
||||
#include "cge.h"
|
||||
#include "bsl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
|
||||
return vmaxnm_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmaxf)
|
||||
r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxnm_f32
|
||||
#define vmaxnm_f32(a, b) simde_vmaxnm_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmaxnm_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmaxnm_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmax)
|
||||
r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxnm_f64
|
||||
#define vmaxnm_f64(a, b) simde_vmaxnm_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
|
||||
return vmaxnmq_f32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
__m128 r = _mm_max_ps(a_.m128, b_.m128);
|
||||
__m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128);
|
||||
r = _mm_andnot_ps(bnan, r);
|
||||
r = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan));
|
||||
r_.m128 = r;
|
||||
#else
|
||||
r_.m128 = _mm_max_ps(a_.m128, b_.m128);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.v128 = wasm_f32x4_max(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmaxf)
|
||||
r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxnmq_f32
|
||||
#define vmaxnmq_f32(a, b) simde_vmaxnmq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmaxnmq_f64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_max(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
__m128d r = _mm_max_pd(a_.m128d, b_.m128d);
|
||||
__m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d);
|
||||
r = _mm_andnot_pd(bnan, r);
|
||||
r = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan));
|
||||
r_.m128d = r;
|
||||
#else
|
||||
r_.m128d = _mm_max_pd(a_.m128d, b_.m128d);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.v128 = wasm_f64x2_max(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmax)
|
||||
r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxnmq_f64
|
||||
#define vmaxnmq_f64(a, b) simde_vmaxnmq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MAXNM_H) */
|
||||
400
lib/simd_wrapper/simde/arm/neon/maxv.h
Normal file
400
lib/simd_wrapper/simde/arm/neon/maxv.h
Normal file
@@ -0,0 +1,400 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MAXV_H)
|
||||
#define SIMDE_ARM_NEON_MAXV_H
|
||||
|
||||
#include "types.h"
|
||||
#include <float.h>
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vmaxv_f32(simde_float32x2_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_f32(a);
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
|
||||
r = -SIMDE_MATH_INFINITYF;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_f32
|
||||
#define vmaxv_f32(v) simde_vmaxv_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vmaxv_s8(simde_int8x8_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_s8(a);
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
|
||||
r = INT8_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_s8
|
||||
#define vmaxv_s8(v) simde_vmaxv_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vmaxv_s16(simde_int16x4_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_s16(a);
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
|
||||
r = INT16_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_s16
|
||||
#define vmaxv_s16(v) simde_vmaxv_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vmaxv_s32(simde_int32x2_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_s32(a);
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
|
||||
r = INT32_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_s32
|
||||
#define vmaxv_s32(v) simde_vmaxv_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vmaxv_u8(simde_uint8x8_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_u8
|
||||
#define vmaxv_u8(v) simde_vmaxv_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vmaxv_u16(simde_uint16x4_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_u16
|
||||
#define vmaxv_u16(v) simde_vmaxv_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vmaxv_u32(simde_uint32x2_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxv_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxv_u32
|
||||
#define vmaxv_u32(v) simde_vmaxv_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vmaxvq_f32(simde_float32x4_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_f32(a);
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
|
||||
r = -SIMDE_MATH_INFINITYF;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_f32
|
||||
#define vmaxvq_f32(v) simde_vmaxvq_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vmaxvq_f64(simde_float64x2_t a) {
|
||||
simde_float64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_f64(a);
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
|
||||
r = -SIMDE_MATH_INFINITY;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_f64
|
||||
#define vmaxvq_f64(v) simde_vmaxvq_f64(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vmaxvq_s8(simde_int8x16_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_s8(a);
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
|
||||
r = INT8_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_s8
|
||||
#define vmaxvq_s8(v) simde_vmaxvq_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vmaxvq_s16(simde_int16x8_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_s16(a);
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
r = INT16_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_s16
|
||||
#define vmaxvq_s16(v) simde_vmaxvq_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vmaxvq_s32(simde_int32x4_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_s32(a);
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
r = INT32_MIN;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_s32
|
||||
#define vmaxvq_s32(v) simde_vmaxvq_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vmaxvq_u8(simde_uint8x16_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_u8(a);
|
||||
#else
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_u8
|
||||
#define vmaxvq_u8(v) simde_vmaxvq_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vmaxvq_u16(simde_uint16x8_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_u16(a);
|
||||
#else
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_u16
|
||||
#define vmaxvq_u16(v) simde_vmaxvq_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vmaxvq_u32(simde_uint32x4_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vmaxvq_u32(a);
|
||||
#else
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
r = 0;
|
||||
SIMDE_VECTORIZE_REDUCTION(max:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] > r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmaxvq_u32
|
||||
#define vmaxvq_u32(v) simde_vmaxvq_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MAXV_H) */
|
||||
681
lib/simd_wrapper/simde/arm/neon/min.h
Normal file
681
lib/simd_wrapper/simde/arm/neon/min.h
Normal file
@@ -0,0 +1,681 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MIN_H)
|
||||
#define SIMDE_ARM_NEON_MIN_H
|
||||
|
||||
#include "types.h"
|
||||
#include "cgt.h"
|
||||
#include "ceq.h"
|
||||
#include "bsl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmin_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_f32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(64)
|
||||
simde_float32x2_t r = simde_vbsl_f32(simde_vcgt_f32(b, a), a, b);
|
||||
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r = simde_vbsl_f32(simde_vceq_f32(a, a), simde_vbsl_f32(simde_vceq_f32(b, b), r, b), a);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
if (simde_math_isnanf(a_.values[i])) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (simde_math_isnanf(b_.values[i])) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_f32
|
||||
#define vmin_f32(a, b) simde_vmin_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmin_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmin_f64(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(64)
|
||||
simde_float64x1_t r = simde_vbsl_f64(simde_vcgt_f64(b, a), a, b);
|
||||
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
r = simde_vbsl_f64(simde_vceq_f64(a, a), simde_vbsl_f64(simde_vceq_f64(b, b), r, b), a);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
if (simde_math_isnan(a_.values[i])) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (simde_math_isnan(b_.values[i])) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_f64
|
||||
#define vmin_f64(a, b) simde_vmin_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmin_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_s8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s8(simde_vcgt_s8(b, a), a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_s8
|
||||
#define vmin_s8(a, b) simde_vmin_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmin_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s16(simde_vcgt_s16(b, a), a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(b_.m64));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_s16
|
||||
#define vmin_s16(a, b) simde_vmin_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmin_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_s32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s32(simde_vcgt_s32(b, a), a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_s32
|
||||
#define vmin_s32(a, b) simde_vmin_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_x_vmin_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_s64(simde_vcgt_s64(b, a), a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_u8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u8(simde_vcgt_u8(b, a), a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_u8
|
||||
#define vmin_u8(a, b) simde_vmin_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_u16(a, b);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE)
|
||||
return simde_vbsl_u16(simde_vcgt_u16(b, a), a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
|
||||
r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(a_.m64, b_.m64));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_u16
|
||||
#define vmin_u16(a, b) simde_vmin_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmin_u32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u32(simde_vcgt_u32(b, a), a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmin_u32
|
||||
#define vmin_u32(a, b) simde_vmin_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_x_vmin_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if SIMDE_NATURAL_VECTOR_SIZE > 0
|
||||
return simde_vbsl_u64(simde_vcgt_u64(b, a), a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vminq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_f32(a, b);
|
||||
#elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.m128 = _mm_min_ps(a_.m128, b_.m128);
|
||||
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128 = _mm_blendv_ps(_mm_set1_ps(SIMDE_MATH_NANF), _mm_min_ps(a_.m128, b_.m128), _mm_cmpord_ps(a_.m128, b_.m128));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
if (simde_math_isnanf(a_.values[i])) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (simde_math_isnanf(b_.values[i])) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_f32
|
||||
#define vminq_f32(a, b) simde_vminq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vminq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vminq_f64(a, b);
|
||||
#elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.m128d = _mm_min_pd(a_.m128d, b_.m128d);
|
||||
#elif defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128d = _mm_blendv_pd(_mm_set1_pd(SIMDE_MATH_NAN), _mm_min_pd(a_.m128d, b_.m128d), _mm_cmpord_pd(a_.m128d, b_.m128d));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
if (simde_math_isnan(a_.values[i])) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (simde_math_isnan(b_.values[i])) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#else
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_f64
|
||||
#define vminq_f64(a, b) simde_vminq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vminq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_min_epi8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_s8
|
||||
#define vminq_s8(a, b) simde_vminq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vminq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_min_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_s16
|
||||
#define vminq_s16(a, b) simde_vminq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vminq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_min_epi32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_s32
|
||||
#define vminq_s32(a, b) simde_vminq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_x_vminq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_min_epi64(a_.m128i, b_.m128i);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_min_epu8(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u8x16_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_u8
|
||||
#define vminq_u8(a, b) simde_vminq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_min_epu16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
|
||||
r_.m128i = _mm_sub_epi16(a_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u16x8_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_u16
|
||||
#define vminq_u16(a, b) simde_vminq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vminq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_min_epu32(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i i32_min = _mm_set1_epi32(INT32_MIN);
|
||||
const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i);
|
||||
__m128i m =
|
||||
_mm_cmpeq_epi32(
|
||||
/* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */
|
||||
_mm_and_si128(
|
||||
difference,
|
||||
_mm_xor_si128(
|
||||
_mm_cmpgt_epi32(
|
||||
_mm_xor_si128(difference, i32_min),
|
||||
_mm_xor_si128(a_.m128i, i32_min)
|
||||
),
|
||||
_mm_set1_epi32(~INT32_C(0))
|
||||
)
|
||||
),
|
||||
_mm_setzero_si128()
|
||||
);
|
||||
r_.m128i =
|
||||
_mm_or_si128(
|
||||
_mm_and_si128(m, a_.m128i),
|
||||
_mm_andnot_si128(m, b_.m128i)
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_u32x4_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminq_u32
|
||||
#define vminq_u32(a, b) simde_vminq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_x_vminq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i];
|
||||
}
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MIN_H) */
|
||||
219
lib/simd_wrapper/simde/arm/neon/minnm.h
Normal file
219
lib/simd_wrapper/simde/arm/neon/minnm.h
Normal file
@@ -0,0 +1,219 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MINNM_H)
|
||||
#define SIMDE_ARM_NEON_MINNM_H
|
||||
|
||||
#include "types.h"
|
||||
#include "cle.h"
|
||||
#include "bsl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vminnm_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
|
||||
return vminnm_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fminf)
|
||||
r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminnm_f32
|
||||
#define vminnm_f32(a, b) simde_vminnm_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vminnm_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vminnm_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmin)
|
||||
r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminnm_f64
|
||||
#define vminnm_f64(a, b) simde_vminnm_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6)
|
||||
return vminnmq_f32(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS)
|
||||
return simde_vbslq_f32(simde_vcleq_f32(a, b), a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
__m128 r = _mm_min_ps(a_.m128, b_.m128);
|
||||
__m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128);
|
||||
r = _mm_andnot_ps(bnan, r);
|
||||
r_.m128 = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan));
|
||||
#else
|
||||
r_.m128 = _mm_min_ps(a_.m128, b_.m128);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.v128 = wasm_f32x4_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fminf)
|
||||
r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminnmq_f32
|
||||
#define vminnmq_f32(a, b) simde_vminnmq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vminnmq_f64(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS)
|
||||
return simde_vbslq_f64(simde_vcleq_f64(a, b), a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_min(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
#if !defined(SIMDE_FAST_NANS)
|
||||
__m128d r = _mm_min_pd(a_.m128d, b_.m128d);
|
||||
__m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d);
|
||||
r = _mm_andnot_pd(bnan, r);
|
||||
r_.m128d = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan));
|
||||
#else
|
||||
r_.m128d = _mm_min_pd(a_.m128d, b_.m128d);
|
||||
#endif
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
|
||||
r_.v128 = wasm_f64x2_min(a_.v128, b_.v128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
#if defined(simde_math_fmin)
|
||||
r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]);
|
||||
#else
|
||||
if (a_.values[i] < b_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else if (a_.values[i] > b_.values[i]) {
|
||||
r_.values[i] = b_.values[i];
|
||||
} else if (a_.values[i] == a_.values[i]) {
|
||||
r_.values[i] = a_.values[i];
|
||||
} else {
|
||||
r_.values[i] = b_.values[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminnmq_f64
|
||||
#define vminnmq_f64(a, b) simde_vminnmq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MINNM_H) */
|
||||
424
lib/simd_wrapper/simde/arm/neon/minv.h
Normal file
424
lib/simd_wrapper/simde/arm/neon/minv.h
Normal file
@@ -0,0 +1,424 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MINV_H)
|
||||
#define SIMDE_ARM_NEON_MINV_H
|
||||
|
||||
#include "types.h"
|
||||
#include <float.h>
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vminv_f32(simde_float32x2_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_f32(a);
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
|
||||
r = SIMDE_MATH_INFINITYF;
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
#endif
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
#else
|
||||
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_f32
|
||||
#define vminv_f32(v) simde_vminv_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vminv_s8(simde_int8x8_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_s8(a);
|
||||
#else
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
|
||||
r = INT8_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_s8
|
||||
#define vminv_s8(v) simde_vminv_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vminv_s16(simde_int16x4_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_s16(a);
|
||||
#else
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
|
||||
r = INT16_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_s16
|
||||
#define vminv_s16(v) simde_vminv_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vminv_s32(simde_int32x2_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_s32(a);
|
||||
#else
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
|
||||
r = INT32_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_s32
|
||||
#define vminv_s32(v) simde_vminv_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vminv_u8(simde_uint8x8_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
r = UINT8_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_u8
|
||||
#define vminv_u8(v) simde_vminv_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vminv_u16(simde_uint16x4_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
r = UINT16_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_u16
|
||||
#define vminv_u16(v) simde_vminv_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vminv_u32(simde_uint32x2_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminv_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
r = UINT32_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminv_u32
|
||||
#define vminv_u32(v) simde_vminv_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vminvq_f32(simde_float32x4_t a) {
|
||||
simde_float32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_f32(a);
|
||||
#else
|
||||
simde_float32x4_private a_ = simde_float32x4_to_private(a);
|
||||
|
||||
r = SIMDE_MATH_INFINITYF;
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
#endif
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
#else
|
||||
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_f32
|
||||
#define vminvq_f32(v) simde_vminvq_f32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vminvq_f64(simde_float64x2_t a) {
|
||||
simde_float64_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_f64(a);
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
|
||||
r = SIMDE_MATH_INFINITY;
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
#endif
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
#if defined(SIMDE_FAST_NANS)
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
#else
|
||||
r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i]));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_f64
|
||||
#define vminvq_f64(v) simde_vminvq_f64(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int8_t
|
||||
simde_vminvq_s8(simde_int8x16_t a) {
|
||||
int8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_s8(a);
|
||||
#else
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(a);
|
||||
|
||||
r = INT8_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_s8
|
||||
#define vminvq_s8(v) simde_vminvq_s8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int16_t
|
||||
simde_vminvq_s16(simde_int16x8_t a) {
|
||||
int16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_s16(a);
|
||||
#else
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
r = INT16_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_s16
|
||||
#define vminvq_s16(v) simde_vminvq_s16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int32_t
|
||||
simde_vminvq_s32(simde_int32x4_t a) {
|
||||
int32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_s32(a);
|
||||
#else
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
r = INT32_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_s32
|
||||
#define vminvq_s32(v) simde_vminvq_s32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint8_t
|
||||
simde_vminvq_u8(simde_uint8x16_t a) {
|
||||
uint8_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_u8(a);
|
||||
#else
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
r = UINT8_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_u8
|
||||
#define vminvq_u8(v) simde_vminvq_u8(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint16_t
|
||||
simde_vminvq_u16(simde_uint16x8_t a) {
|
||||
uint16_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_u16(a);
|
||||
#else
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
r = UINT16_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_u16
|
||||
#define vminvq_u16(v) simde_vminvq_u16(v)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint32_t
|
||||
simde_vminvq_u32(simde_uint32x4_t a) {
|
||||
uint32_t r;
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r = vminvq_u32(a);
|
||||
#else
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
r = UINT32_MAX;
|
||||
SIMDE_VECTORIZE_REDUCTION(min:r)
|
||||
for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
|
||||
r = a_.values[i] < r ? a_.values[i] : r;
|
||||
}
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vminvq_u32
|
||||
#define vminvq_u32(v) simde_vminvq_u32(v)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MINV_H) */
|
||||
296
lib/simd_wrapper/simde/arm/neon/mla.h
Normal file
296
lib/simd_wrapper/simde/arm/neon/mla.h
Normal file
@@ -0,0 +1,296 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLA_H)
|
||||
#define SIMDE_ARM_NEON_MLA_H
|
||||
|
||||
#include "types.h"
|
||||
#include "add.h"
|
||||
#include "mul.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_f32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_f32(simde_vmul_f32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_f32
|
||||
#define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmla_f64(a, b, c);
|
||||
#else
|
||||
return simde_vadd_f64(simde_vmul_f64(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_f64
|
||||
#define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_s8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s8(simde_vmul_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_s8
|
||||
#define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_s16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s16(simde_vmul_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_s16
|
||||
#define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_s32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s32(simde_vmul_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_s32
|
||||
#define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_u8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u8(simde_vmul_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_u8
|
||||
#define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_u16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u16(simde_vmul_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_u16
|
||||
#define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_u32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u32(simde_vmul_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_u32
|
||||
#define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_f32(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_madd(b, c, a);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b),
|
||||
c_ = simde_float32x4_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128);
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f32(simde_vmulq_f32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_f32
|
||||
#define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlaq_f64(a, b, c);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_madd(b, c, a);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b),
|
||||
c_ = simde_float64x2_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d);
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f64(simde_vmulq_f64(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_f64
|
||||
#define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_s8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s8(simde_vmulq_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_s8
|
||||
#define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_s16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vmulq_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_s16
|
||||
#define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_s32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vmulq_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_s32
|
||||
#define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_u8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u8(simde_vmulq_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_u8
|
||||
#define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_u16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vmulq_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_u16
|
||||
#define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_u32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vmulq_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_u32
|
||||
#define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLA_H) */
|
||||
333
lib/simd_wrapper/simde/arm/neon/mla_n.h
Normal file
333
lib/simd_wrapper/simde/arm/neon/mla_n.h
Normal file
@@ -0,0 +1,333 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLA_N_H)
|
||||
#define SIMDE_ARM_NEON_MLA_N_H
|
||||
|
||||
#include "types.h"
|
||||
#include "add.h"
|
||||
#include "mul.h"
|
||||
#include "mul_n.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmla_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_n_f32(a, b, c);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_n_f32
|
||||
#define vmla_n_f32(a, b, c) simde_vmla_n_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmla_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_n_s16(a, b, c);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_n_s16
|
||||
#define vmla_n_s16(a, b, c) simde_vmla_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmla_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_n_s32(a, b, c);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_n_s32
|
||||
#define vmla_n_s32(a, b, c) simde_vmla_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmla_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_n_u16(a, b, c);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_n_u16
|
||||
#define vmla_n_u16(a, b, c) simde_vmla_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmla_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmla_n_u32(a, b, c);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmla_n_u32
|
||||
#define vmla_n_u32(a, b, c) simde_vmla_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmlaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_n_f32(a, b, c);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
|
||||
return simde_vaddq_f32(simde_vmulq_n_f32(b, c), a);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_n_f32
|
||||
#define vmlaq_n_f32(a, b, c) simde_vmlaq_n_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlaq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_n_s16(a, b, c);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
|
||||
return simde_vaddq_s16(simde_vmulq_n_s16(b, c), a);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_n_s16
|
||||
#define vmlaq_n_s16(a, b, c) simde_vmlaq_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlaq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_n_s32(a, b, c);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
|
||||
return simde_vaddq_s32(simde_vmulq_n_s32(b, c), a);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_n_s32
|
||||
#define vmlaq_n_s32(a, b, c) simde_vmlaq_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlaq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_n_u16(a, b, c);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
|
||||
return simde_vaddq_u16(simde_vmulq_n_u16(b, c), a);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_n_u16
|
||||
#define vmlaq_n_u16(a, b, c) simde_vmlaq_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlaq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlaq_n_u32(a, b, c);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)
|
||||
return simde_vaddq_u32(simde_vmulq_n_u32(b, c), a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.values = (b_.values * c) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlaq_n_u32
|
||||
#define vmlaq_n_u32(a, b, c) simde_vmlaq_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLA_N_H) */
|
||||
156
lib/simd_wrapper/simde/arm/neon/mlal.h
Normal file
156
lib/simd_wrapper/simde/arm/neon/mlal.h
Normal file
@@ -0,0 +1,156 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLAL_H)
|
||||
#define SIMDE_ARM_NEON_MLAL_H
|
||||
|
||||
#include "movl.h"
|
||||
#include "mla.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_s8(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s16(a, simde_vmovl_s8(b), simde_vmovl_s8(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_s8
|
||||
#define vmlal_s8(a, b, c) simde_vmlal_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vmovl_s16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_s16
|
||||
#define vmlal_s16(a, b, c) simde_vmlal_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_s32(a, b, c);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(simde_vmovl_s32(b)),
|
||||
c_ = simde_int64x2_to_private(simde_vmovl_s32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_s32
|
||||
#define vmlal_s32(a, b, c) simde_vmlal_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_u8(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u16(a, simde_vmovl_u8(b), simde_vmovl_u8(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_u8
|
||||
#define vmlal_u8(a, b, c) simde_vmlal_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vmovl_u16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_u16
|
||||
#define vmlal_u16(a, b, c) simde_vmlal_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_u32(a, b, c);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vmovl_u32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_u32
|
||||
#define vmlal_u32(a, b, c) simde_vmlal_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLAL_H) */
|
||||
156
lib/simd_wrapper/simde/arm/neon/mlal_high.h
Normal file
156
lib/simd_wrapper/simde/arm/neon/mlal_high.h
Normal file
@@ -0,0 +1,156 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_MLAL_HIGH_H
|
||||
|
||||
#include "movl_high.h"
|
||||
#include "mla.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_s8(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s16(a, simde_vmovl_high_s8(b), simde_vmovl_high_s8(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_s8
|
||||
#define vmlal_high_s8(a, b, c) simde_vmlal_high_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vmovl_high_s16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_s16
|
||||
#define vmlal_high_s16(a, b, c) simde_vmlal_high_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_s32(a, b, c);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
|
||||
c_ = simde_int64x2_to_private(simde_vmovl_high_s32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_s32
|
||||
#define vmlal_high_s32(a, b, c) simde_vmlal_high_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_u8(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u16(a, simde_vmovl_high_u8(b), simde_vmovl_high_u8(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_u8
|
||||
#define vmlal_high_u8(a, b, c) simde_vmlal_high_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vmovl_high_u16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_u16
|
||||
#define vmlal_high_u16(a, b, c) simde_vmlal_high_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_u32(a, b, c);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vmovl_high_u32(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_u32
|
||||
#define vmlal_high_u32(a, b, c) simde_vmlal_high_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) */
|
||||
128
lib/simd_wrapper/simde/arm/neon/mlal_high_n.h
Normal file
128
lib/simd_wrapper/simde/arm/neon/mlal_high_n.h
Normal file
@@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Décio Luiz Gazzoni Filho <decio@decpp.net>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H)
|
||||
#define SIMDE_ARM_NEON_MLAL_HIGH_N_H
|
||||
|
||||
#include "movl_high.h"
|
||||
#include "dup_n.h"
|
||||
#include "mla.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_n_s16
|
||||
#define vmlal_high_n_s16(a, b, c) simde_vmlal_high_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_n_s32(a, b, c);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
|
||||
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_n_s32
|
||||
#define vmlal_high_n_s32(a, b, c) simde_vmlal_high_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlal_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_n_u16
|
||||
#define vmlal_high_n_u16(a, b, c) simde_vmlal_high_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlal_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlal_high_n_u32(a, b, c);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_high_n_u32
|
||||
#define vmlal_high_n_u32(a, b, c) simde_vmlal_high_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) */
|
||||
120
lib/simd_wrapper/simde/arm/neon/mlal_lane.h
Normal file
120
lib/simd_wrapper/simde/arm/neon/mlal_lane.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLAL_LANE_H)
|
||||
#define SIMDE_ARM_NEON_MLAL_LANE_H
|
||||
|
||||
#include "mlal.h"
|
||||
#include "dup_lane.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlal_lane_s16(a, b, v, lane) vmlal_lane_s16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_lane_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_lane_s16
|
||||
#define vmlal_lane_s16(a, b, c, lane) simde_vmlal_lane_s16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlal_lane_s32(a, b, v, lane) vmlal_lane_s32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_lane_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_lane_s32
|
||||
#define vmlal_lane_s32(a, b, c, lane) simde_vmlal_lane_s32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlal_lane_u16(a, b, v, lane) vmlal_lane_u16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_lane_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_lane_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_lane_u16
|
||||
#define vmlal_lane_u16(a, b, c, lane) simde_vmlal_lane_u16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlal_lane_u32(a, b, v, lane) vmlal_lane_u32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_lane_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_lane_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_lane_u32
|
||||
#define vmlal_lane_u32(a, b, c, lane) simde_vmlal_lane_u32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlal_laneq_s16(a, b, v, lane) vmlal_laneq_s16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_laneq_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_laneq_s16
|
||||
#define vmlal_laneq_s16(a, b, c, lane) simde_vmlal_laneq_s16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlal_laneq_s32(a, b, v, lane) vmlal_laneq_s32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_laneq_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_laneq_s32
|
||||
#define vmlal_laneq_s32(a, b, c, lane) simde_vmlal_laneq_s32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlal_laneq_u16(a, b, v, lane) vmlal_laneq_u16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_laneq_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_laneq_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_laneq_u16
|
||||
#define vmlal_laneq_u16(a, b, c, lane) simde_vmlal_laneq_u16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlal_laneq_u32(a, b, v, lane) vmlal_laneq_u32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlal_laneq_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_laneq_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_laneq_u32
|
||||
#define vmlal_laneq_u32(a, b, c, lane) simde_vmlal_laneq_u32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLAL_LANE_H) */
|
||||
128
lib/simd_wrapper/simde/arm/neon/mlal_n.h
Normal file
128
lib/simd_wrapper/simde/arm/neon/mlal_n.h
Normal file
@@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLAL_N_H)
|
||||
#define SIMDE_ARM_NEON_MLAL_N_H
|
||||
|
||||
#include "movl.h"
|
||||
#include "dup_n.h"
|
||||
#include "mla.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_n_s16
|
||||
#define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_n_s32(a, b, c);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(simde_vmovl_s32(b)),
|
||||
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_n_s32
|
||||
#define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_n_u16
|
||||
#define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlal_n_u32(a, b, c);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = (b_.values * c_.values) + a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlal_n_u32
|
||||
#define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */
|
||||
290
lib/simd_wrapper/simde/arm/neon/mls.h
Normal file
290
lib/simd_wrapper/simde/arm/neon/mls.h
Normal file
@@ -0,0 +1,290 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLS_H)
|
||||
#define SIMDE_ARM_NEON_MLS_H
|
||||
|
||||
#include "mul.h"
|
||||
#include "sub.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmls_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_f32(a, b, c);
|
||||
#else
|
||||
return simde_vsub_f32(a, simde_vmul_f32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_f32
|
||||
#define vmls_f32(a, b, c) simde_vmls_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmls_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmls_f64(a, b, c);
|
||||
#else
|
||||
return simde_vsub_f64(a, simde_vmul_f64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_f64
|
||||
#define vmls_f64(a, b, c) simde_vmls_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmls_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_s8(a, b, c);
|
||||
#else
|
||||
return simde_vsub_s8(a, simde_vmul_s8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_s8
|
||||
#define vmls_s8(a, b, c) simde_vmls_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmls_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_s16(a, b, c);
|
||||
#else
|
||||
return simde_vsub_s16(a, simde_vmul_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_s16
|
||||
#define vmls_s16(a, b, c) simde_vmls_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmls_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_s32(a, b, c);
|
||||
#else
|
||||
return simde_vsub_s32(a, simde_vmul_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_s32
|
||||
#define vmls_s32(a, b, c) simde_vmls_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmls_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_u8(a, b, c);
|
||||
#else
|
||||
return simde_vsub_u8(a, simde_vmul_u8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_u8
|
||||
#define vmls_u8(a, b, c) simde_vmls_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmls_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_u16(a, b, c);
|
||||
#else
|
||||
return simde_vsub_u16(a, simde_vmul_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_u16
|
||||
#define vmls_u16(a, b, c) simde_vmls_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmls_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_u32(a, b, c);
|
||||
#else
|
||||
return simde_vsub_u32(a, simde_vmul_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_u32
|
||||
#define vmls_u32(a, b, c) simde_vmls_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmlsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_f32(a, b, c);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b),
|
||||
c_ = simde_float32x4_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128 = _mm_fnmadd_ps(b_.m128, c_.m128, a_.m128);
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vsubq_f32(a, simde_vmulq_f32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_f32
|
||||
#define vmlsq_f32(a, b, c) simde_vmlsq_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmlsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsq_f64(a, b, c);
|
||||
#elif \
|
||||
defined(SIMDE_X86_FMA_NATIVE)
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b),
|
||||
c_ = simde_float64x2_to_private(c);
|
||||
|
||||
#if defined(SIMDE_X86_FMA_NATIVE)
|
||||
r_.m128d = _mm_fnmadd_pd(b_.m128d, c_.m128d, a_.m128d);
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vsubq_f64(a, simde_vmulq_f64(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_f64
|
||||
#define vmlsq_f64(a, b, c) simde_vmlsq_f64((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmlsq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_s8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s8(a, simde_vmulq_s8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_s8
|
||||
#define vmlsq_s8(a, b, c) simde_vmlsq_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlsq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_s16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s16(a, simde_vmulq_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_s16
|
||||
#define vmlsq_s16(a, b, c) simde_vmlsq_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_s32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s32(a, simde_vmulq_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_s32
|
||||
#define vmlsq_s32(a, b, c) simde_vmlsq_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmlsq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_u8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u8(a, simde_vmulq_u8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_u8
|
||||
#define vmlsq_u8(a, b, c) simde_vmlsq_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlsq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_u16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u16(a, simde_vmulq_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_u16
|
||||
#define vmlsq_u16(a, b, c) simde_vmlsq_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_u32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u32(a, simde_vmulq_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_u32
|
||||
#define vmlsq_u32(a, b, c) simde_vmlsq_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLS_H) */
|
||||
181
lib/simd_wrapper/simde/arm/neon/mls_n.h
Normal file
181
lib/simd_wrapper/simde/arm/neon/mls_n.h
Normal file
@@ -0,0 +1,181 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLS_N_H)
|
||||
#define SIMDE_ARM_NEON_MLS_N_H
|
||||
|
||||
#include "sub.h"
|
||||
#include "dup_n.h"
|
||||
#include "mls.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmls_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_n_f32(a, b, c);
|
||||
#else
|
||||
return simde_vmls_f32(a, b, simde_vdup_n_f32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_n_f32
|
||||
#define vmls_n_f32(a, b, c) simde_vmls_n_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmls_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmls_s16(a, b, simde_vdup_n_s16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_n_s16
|
||||
#define vmls_n_s16(a, b, c) simde_vmls_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmls_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_n_s32(a, b, c);
|
||||
#else
|
||||
return simde_vmls_s32(a, b, simde_vdup_n_s32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_n_s32
|
||||
#define vmls_n_s32(a, b, c) simde_vmls_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmls_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmls_u16(a, b, simde_vdup_n_u16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_n_u16
|
||||
#define vmls_n_u16(a, b, c) simde_vmls_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmls_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmls_n_u32(a, b, c);
|
||||
#else
|
||||
return simde_vmls_u32(a, b, simde_vdup_n_u32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmls_n_u32
|
||||
#define vmls_n_u32(a, b, c) simde_vmls_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmlsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_n_f32(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_f32(a, b, simde_vdupq_n_f32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_n_f32
|
||||
#define vmlsq_n_f32(a, b, c) simde_vmlsq_n_f32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlsq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_s16(a, b, simde_vdupq_n_s16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_n_s16
|
||||
#define vmlsq_n_s16(a, b, c) simde_vmlsq_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_n_s32(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_s32(a, b, simde_vdupq_n_s32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_n_s32
|
||||
#define vmlsq_n_s32(a, b, c) simde_vmlsq_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlsq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_u16(a, b, simde_vdupq_n_u16(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_n_u16
|
||||
#define vmlsq_n_u16(a, b, c) simde_vmlsq_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsq_n_u32(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_u32(a, b, simde_vdupq_n_u32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsq_n_u32
|
||||
#define vmlsq_n_u32(a, b, c) simde_vmlsq_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLS_N_H) */
|
||||
124
lib/simd_wrapper/simde/arm/neon/mlsl.h
Normal file
124
lib/simd_wrapper/simde/arm/neon/mlsl.h
Normal file
@@ -0,0 +1,124 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLSL_H)
|
||||
#define SIMDE_ARM_NEON_MLSL_H
|
||||
|
||||
#include "mull.h"
|
||||
#include "sub.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlsl_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_s8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s16(a, simde_vmull_s8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_s8
|
||||
#define vmlsl_s8(a, b, c) simde_vmlsl_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_s16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s32(a, simde_vmull_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_s16
|
||||
#define vmlsl_s16(a, b, c) simde_vmlsl_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_s32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s64(a, simde_vmull_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_s32
|
||||
#define vmlsl_s32(a, b, c) simde_vmlsl_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlsl_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_u8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u16(a, simde_vmull_u8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_u8
|
||||
#define vmlsl_u8(a, b, c) simde_vmlsl_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsl_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_u16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u32(a, simde_vmull_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_u16
|
||||
#define vmlsl_u16(a, b, c) simde_vmlsl_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlsl_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_u32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u64(a, simde_vmull_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_u32
|
||||
#define vmlsl_u32(a, b, c) simde_vmlsl_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLSL_H) */
|
||||
124
lib/simd_wrapper/simde/arm/neon/mlsl_high.h
Normal file
124
lib/simd_wrapper/simde/arm/neon/mlsl_high.h
Normal file
@@ -0,0 +1,124 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_MLSL_HIGH_H
|
||||
|
||||
#include "mull_high.h"
|
||||
#include "sub.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmlsl_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_s8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s16(a, simde_vmull_high_s8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_s8
|
||||
#define vmlsl_high_s8(a, b, c) simde_vmlsl_high_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_s16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s32(a, simde_vmull_high_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_s16
|
||||
#define vmlsl_high_s16(a, b, c) simde_vmlsl_high_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_s32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s64(a, simde_vmull_high_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_s32
|
||||
#define vmlsl_high_s32(a, b, c) simde_vmlsl_high_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmlsl_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_u8(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u16(a, simde_vmull_high_u8(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_u8
|
||||
#define vmlsl_high_u8(a, b, c) simde_vmlsl_high_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsl_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_u16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u32(a, simde_vmull_high_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_u16
|
||||
#define vmlsl_high_u16(a, b, c) simde_vmlsl_high_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlsl_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_u32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u64(a, simde_vmull_high_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_u32
|
||||
#define vmlsl_high_u32(a, b, c) simde_vmlsl_high_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) */
|
||||
128
lib/simd_wrapper/simde/arm/neon/mlsl_high_n.h
Normal file
128
lib/simd_wrapper/simde/arm/neon/mlsl_high_n.h
Normal file
@@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Décio Luiz Gazzoni Filho <decio@decpp.net>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H)
|
||||
#define SIMDE_ARM_NEON_MLSL_HIGH_N_H
|
||||
|
||||
#include "movl_high.h"
|
||||
#include "dup_n.h"
|
||||
#include "mls.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_n_s16
|
||||
#define vmlsl_high_n_s16(a, b, c) simde_vmlsl_high_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_n_s32(a, b, c);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)),
|
||||
c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values - (b_.values * c_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_n_s32
|
||||
#define vmlsl_high_n_s32(a, b, c) simde_vmlsl_high_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsl_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vmlsq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_n_u16
|
||||
#define vmlsl_high_n_u16(a, b, c) simde_vmlsl_high_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlsl_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmlsl_high_n_u32(a, b, c);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)),
|
||||
c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c));
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values - (b_.values * c_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_high_n_u32
|
||||
#define vmlsl_high_n_u32(a, b, c) simde_vmlsl_high_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) */
|
||||
120
lib/simd_wrapper/simde/arm/neon/mlsl_lane.h
Normal file
120
lib/simd_wrapper/simde/arm/neon/mlsl_lane.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLSL_LANE_H)
|
||||
#define SIMDE_ARM_NEON_MLSL_LANE_H
|
||||
|
||||
#include "mlsl.h"
|
||||
#include "dup_lane.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlsl_lane_s16(a, b, v, lane) vmlsl_lane_s16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_lane_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_lane_s16
|
||||
#define vmlsl_lane_s16(a, b, c, lane) simde_vmlsl_lane_s16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlsl_lane_s32(a, b, v, lane) vmlsl_lane_s32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_lane_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_lane_s32
|
||||
#define vmlsl_lane_s32(a, b, c, lane) simde_vmlsl_lane_s32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlsl_lane_u16(a, b, v, lane) vmlsl_lane_u16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_lane_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_lane_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_lane_u16
|
||||
#define vmlsl_lane_u16(a, b, c, lane) simde_vmlsl_lane_u16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmlsl_lane_u32(a, b, v, lane) vmlsl_lane_u32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_lane_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_lane_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_lane_u32
|
||||
#define vmlsl_lane_u32(a, b, c, lane) simde_vmlsl_lane_u32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlsl_laneq_s16(a, b, v, lane) vmlsl_laneq_s16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_laneq_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_laneq_s16
|
||||
#define vmlsl_laneq_s16(a, b, c, lane) simde_vmlsl_laneq_s16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlsl_laneq_s32(a, b, v, lane) vmlsl_laneq_s32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_laneq_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_laneq_s32
|
||||
#define vmlsl_laneq_s32(a, b, c, lane) simde_vmlsl_laneq_s32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlsl_laneq_u16(a, b, v, lane) vmlsl_laneq_u16((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_laneq_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_laneq_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_laneq_u16
|
||||
#define vmlsl_laneq_u16(a, b, c, lane) simde_vmlsl_laneq_u16((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmlsl_laneq_u32(a, b, v, lane) vmlsl_laneq_u32((a), (b), (v), (lane))
|
||||
#else
|
||||
#define simde_vmlsl_laneq_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_laneq_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_laneq_u32
|
||||
#define vmlsl_laneq_u32(a, b, c, lane) simde_vmlsl_laneq_u32((a), (b), (c), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLSL_LANE_H) */
|
||||
96
lib/simd_wrapper/simde/arm/neon/mlsl_n.h
Normal file
96
lib/simd_wrapper/simde/arm/neon/mlsl_n.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MLSL_N_H)
|
||||
#define SIMDE_ARM_NEON_MLSL_N_H
|
||||
|
||||
#include "mull_n.h"
|
||||
#include "sub.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_n_s16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s32(a, simde_vmull_n_s16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_n_s16
|
||||
#define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_n_s32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_s64(a, simde_vmull_n_s32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_n_s32
|
||||
#define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_n_u16(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u32(a, simde_vmull_n_u16(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_n_u16
|
||||
#define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmlsl_n_u32(a, b, c);
|
||||
#else
|
||||
return simde_vsubq_u64(a, simde_vmull_n_u32(b, c));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmlsl_n_u32
|
||||
#define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */
|
||||
238
lib/simd_wrapper/simde/arm/neon/movl.h
Normal file
238
lib/simd_wrapper/simde/arm/neon/movl.h
Normal file
@@ -0,0 +1,238 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MOVL_H)
|
||||
#define SIMDE_ARM_NEON_MOVL_H
|
||||
|
||||
#include "combine.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmovl_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_s8(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int16x8_private r_;
|
||||
simde_int8x16_private a_ = simde_int8x16_to_private(simde_vcombine_s8(a, a));
|
||||
|
||||
r_.v128 = wasm_i16x8_extend_low_i8x16(a_.v128);
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
simde_int8x8_private a_ = simde_int8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_s8
|
||||
#define vmovl_s8(a) simde_vmovl_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmovl_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_s16(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int32x4_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(simde_vcombine_s16(a, a));
|
||||
|
||||
r_.v128 = wasm_i32x4_extend_low_i16x8(a_.v128);
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_s16
|
||||
#define vmovl_s16(a) simde_vmovl_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmovl_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_s32(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_int64x2_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(simde_vcombine_s32(a, a));
|
||||
|
||||
r_.v128 = wasm_i64x2_extend_low_i32x4(a_.v128);
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_s32
|
||||
#define vmovl_s32(a) simde_vmovl_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmovl_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_u8(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a));
|
||||
|
||||
r_.v128 = wasm_u16x8_extend_low_u8x16(a_.v128);
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_u8
|
||||
#define vmovl_u8(a) simde_vmovl_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmovl_u16(simde_uint16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_u16(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vcombine_u16(a, a));
|
||||
|
||||
r_.v128 = wasm_u32x4_extend_low_u16x8(a_.v128);
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_u16
|
||||
#define vmovl_u16(a) simde_vmovl_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmovl_u32(simde_uint32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovl_u32(a);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vcombine_u32(a, a));
|
||||
|
||||
r_.v128 = wasm_u64x2_extend_low_u32x4(a_.v128);
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_u32
|
||||
#define vmovl_u32(a) simde_vmovl_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MOVL_H) */
|
||||
126
lib/simd_wrapper/simde/arm/neon/movl_high.h
Normal file
126
lib/simd_wrapper/simde/arm/neon/movl_high.h
Normal file
@@ -0,0 +1,126 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MOVL_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_MOVL_HIGH_H
|
||||
|
||||
#include "types.h"
|
||||
#include "movl.h"
|
||||
#include "get_high.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmovl_high_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_s8(a);
|
||||
#else
|
||||
return simde_vmovl_s8(simde_vget_high_s8(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_s8
|
||||
#define vmovl_high_s8(a) simde_vmovl_high_s8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmovl_high_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_s16(a);
|
||||
#else
|
||||
return simde_vmovl_s16(simde_vget_high_s16(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_s16
|
||||
#define vmovl_high_s16(a) simde_vmovl_high_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmovl_high_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_s32(a);
|
||||
#else
|
||||
return simde_vmovl_s32(simde_vget_high_s32(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_s32
|
||||
#define vmovl_high_s32(a) simde_vmovl_high_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmovl_high_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_u8(a);
|
||||
#else
|
||||
return simde_vmovl_u8(simde_vget_high_u8(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_u8
|
||||
#define vmovl_high_u8(a) simde_vmovl_high_u8((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmovl_high_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_u16(a);
|
||||
#else
|
||||
return simde_vmovl_u16(simde_vget_high_u16(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_u16
|
||||
#define vmovl_high_u16(a) simde_vmovl_high_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmovl_high_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovl_high_u32(a);
|
||||
#else
|
||||
return simde_vmovl_u32(simde_vget_high_u32(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovl_high_u32
|
||||
#define vmovl_high_u32(a) simde_vmovl_high_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) */
|
||||
195
lib/simd_wrapper/simde/arm/neon/movn.h
Normal file
195
lib/simd_wrapper/simde/arm/neon/movn.h
Normal file
@@ -0,0 +1,195 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MOVN_H)
|
||||
#define SIMDE_ARM_NEON_MOVN_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmovn_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_s16(a);
|
||||
#else
|
||||
simde_int8x8_private r_;
|
||||
simde_int16x8_private a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_s16
|
||||
#define vmovn_s16(a) simde_vmovn_s16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmovn_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_s32(a);
|
||||
#else
|
||||
simde_int16x4_private r_;
|
||||
simde_int32x4_private a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_s32
|
||||
#define vmovn_s32(a) simde_vmovn_s32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmovn_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_s64(a);
|
||||
#else
|
||||
simde_int32x2_private r_;
|
||||
simde_int64x2_private a_ = simde_int64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_s64
|
||||
#define vmovn_s64(a) simde_vmovn_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmovn_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_u16(a);
|
||||
#else
|
||||
simde_uint8x8_private r_;
|
||||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_u16
|
||||
#define vmovn_u16(a) simde_vmovn_u16((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmovn_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_u32(a);
|
||||
#else
|
||||
simde_uint16x4_private r_;
|
||||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_u32
|
||||
#define vmovn_u32(a) simde_vmovn_u32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmovn_u64(simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmovn_u64(a);
|
||||
#else
|
||||
simde_uint32x2_private r_;
|
||||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_)
|
||||
SIMDE_CONVERT_VECTOR_(r_.values, a_.values);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_u64
|
||||
#define vmovn_u64(a) simde_vmovn_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MOVN_H) */
|
||||
125
lib/simd_wrapper/simde/arm/neon/movn_high.h
Normal file
125
lib/simd_wrapper/simde/arm/neon/movn_high.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_MOVN_HIGH_H
|
||||
|
||||
#include "types.h"
|
||||
#include "movn.h"
|
||||
#include "combine.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_s16(r, a);
|
||||
#else
|
||||
return simde_vcombine_s8(r, simde_vmovn_s16(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_s16
|
||||
#define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_s32(r, a);
|
||||
#else
|
||||
return simde_vcombine_s16(r, simde_vmovn_s32(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_s32
|
||||
#define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_s64(r, a);
|
||||
#else
|
||||
return simde_vcombine_s32(r, simde_vmovn_s64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_s64
|
||||
#define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_u16(r, a);
|
||||
#else
|
||||
return simde_vcombine_u8(r, simde_vmovn_u16(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_u16
|
||||
#define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_u32(r, a);
|
||||
#else
|
||||
return simde_vcombine_u16(r, simde_vmovn_u32(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_u32
|
||||
#define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmovn_high_u64(r, a);
|
||||
#else
|
||||
return simde_vcombine_u32(r, simde_vmovn_u64(a));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmovn_high_u64
|
||||
#define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */
|
||||
579
lib/simd_wrapper/simde/arm/neon/mul.h
Normal file
579
lib/simd_wrapper/simde/arm/neon/mul.h
Normal file
@@ -0,0 +1,579 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MUL_H)
|
||||
#define SIMDE_ARM_NEON_MUL_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_f32(a, b);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_f32
|
||||
#define vmul_f32(a, b) simde_vmul_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmul_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmul_f64(a, b);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_f64
|
||||
#define vmul_f64(a, b) simde_vmul_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmul_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_s8
|
||||
#define vmul_s8(a, b) simde_vmul_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmul_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _m_pmullw(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_s16
|
||||
#define vmul_s16(a, b) simde_vmul_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmul_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_s32
|
||||
#define vmul_s32(a, b) simde_vmul_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_x_vmul_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmul_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_u8
|
||||
#define vmul_u8(a, b) simde_vmul_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmul_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_u16
|
||||
#define vmul_u16(a, b) simde_vmul_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmul_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_u32
|
||||
#define vmul_u32(a, b) simde_vmul_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_x_vmul_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmulq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_f32(a, b);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE_NATIVE)
|
||||
r_.m128 = _mm_mul_ps(a_.m128, b_.m128);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_mul(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_f32
|
||||
#define vmulq_f32(a, b) simde_vmulq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmulq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmulq_f64(a, b);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128d = _mm_mul_pd(a_.m128d, b_.m128d);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_mul(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_f64
|
||||
#define vmulq_f64(a, b) simde_vmulq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmulq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_mul(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://stackoverflow.com/a/29155682/501126 */
|
||||
const __m128i dst_even = _mm_mullo_epi16(a_.m128i, b_.m128i);
|
||||
r_.m128i =
|
||||
_mm_or_si128(
|
||||
_mm_slli_epi16(
|
||||
_mm_mullo_epi16(
|
||||
_mm_srli_epi16(a_.m128i, 8),
|
||||
_mm_srli_epi16(b_.m128i, 8)
|
||||
),
|
||||
8
|
||||
),
|
||||
#if defined(SIMDE_X86_AVX2_NATIVE)
|
||||
_mm_and_si128(dst_even, _mm_set1_epi16(0xFF))
|
||||
#else
|
||||
_mm_srli_epi16(
|
||||
_mm_slli_epi16(dst_even, 8),
|
||||
8
|
||||
)
|
||||
#endif
|
||||
);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_s8
|
||||
#define vmulq_s8(a, b) simde_vmulq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmulq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_s16(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_mullo_epi16(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_s16
|
||||
#define vmulq_s16(a, b) simde_vmulq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmulq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_s32(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_mul(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_s32
|
||||
#define vmulq_s32(a, b) simde_vmulq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_x_vmulq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_mul(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE)
|
||||
r_.m128i = _mm_mullo_epi64(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values * b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmulq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_u8(a, b);
|
||||
#else
|
||||
return
|
||||
simde_vreinterpretq_u8_s8(
|
||||
simde_vmulq_s8(
|
||||
simde_vreinterpretq_s8_u8(a),
|
||||
simde_vreinterpretq_s8_u8(b)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_u8
|
||||
#define vmulq_u8(a, b) simde_vmulq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmulq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_u16(a, b);
|
||||
#else
|
||||
return
|
||||
simde_vreinterpretq_u16_s16(
|
||||
simde_vmulq_s16(
|
||||
simde_vreinterpretq_s16_u16(a),
|
||||
simde_vreinterpretq_s16_u16(b)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_u16
|
||||
#define vmulq_u16(a, b) simde_vmulq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmulq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_u32(a, b);
|
||||
#else
|
||||
return
|
||||
simde_vreinterpretq_u32_s32(
|
||||
simde_vmulq_s32(
|
||||
simde_vreinterpretq_s32_u32(a),
|
||||
simde_vreinterpretq_s32_u32(b)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_u32
|
||||
#define vmulq_u32(a, b) simde_vmulq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
return
|
||||
simde_vreinterpretq_u64_s64(
|
||||
simde_x_vmulq_s64(
|
||||
simde_vreinterpretq_s64_u64(a),
|
||||
simde_vreinterpretq_s64_u64(b)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MUL_H) */
|
||||
695
lib/simd_wrapper/simde/arm/neon/mul_lane.h
Normal file
695
lib/simd_wrapper/simde/arm/neon/mul_lane.h
Normal file
@@ -0,0 +1,695 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MUL_LANE_H)
|
||||
#define SIMDE_ARM_NEON_MUL_LANE_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vmuld_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
return a * simde_float64x1_to_private(b).values[lane];
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vmuld_lane_f64(a, b, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_lane_f64(a, b, lane))
|
||||
#else
|
||||
#define simde_vmuld_lane_f64(a, b, lane) vmuld_lane_f64((a), (b), (lane))
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmuld_lane_f64
|
||||
#define vmuld_lane_f64(a, b, lane) simde_vmuld_lane_f64(a, b, lane)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vmuld_laneq_f64(simde_float64_t a, simde_float64x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
return a * simde_float64x2_to_private(b).values[lane];
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vmuld_laneq_f64(a, b, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_laneq_f64(a, b, lane))
|
||||
#else
|
||||
#define simde_vmuld_laneq_f64(a, b, lane) vmuld_laneq_f64((a), (b), (lane))
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmuld_laneq_f64
|
||||
#define vmuld_laneq_f64(a, b, lane) simde_vmuld_laneq_f64(a, b, lane)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vmuls_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
return a * simde_float32x2_to_private(b).values[lane];
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vmuls_lane_f32(a, b, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_lane_f32(a, b, lane))
|
||||
#else
|
||||
#define simde_vmuls_lane_f32(a, b, lane) vmuls_lane_f32((a), (b), (lane))
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmuls_lane_f32
|
||||
#define vmuls_lane_f32(a, b, lane) simde_vmuls_lane_f32(a, b, lane)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
return a * simde_float32x4_to_private(b).values[lane];
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
|
||||
#define simde_vmuls_laneq_f32(a, b, lane) \
|
||||
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_laneq_f32(a, b, lane))
|
||||
#else
|
||||
#define simde_vmuls_laneq_f32(a, b, lane) vmuls_laneq_f32((a), (b), (lane))
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmuls_laneq_f32
|
||||
#define vmuls_laneq_f32(a, b, lane) simde_vmuls_laneq_f32(a, b, lane)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a),
|
||||
b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_f32
|
||||
#define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a),
|
||||
b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_f64
|
||||
#define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_s16
|
||||
#define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_s32
|
||||
#define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_u16
|
||||
#define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_lane_u32
|
||||
#define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmul_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a);
|
||||
simde_int16x8_private
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_s16(a, b, lane) vmul_laneq_s16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_s16
|
||||
#define vmul_laneq_s16(a, b, lane) simde_vmul_laneq_s16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmul_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a);
|
||||
simde_int32x4_private
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_s32(a, b, lane) vmul_laneq_s32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_s32
|
||||
#define vmul_laneq_s32(a, b, lane) simde_vmul_laneq_s32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmul_laneq_u16(simde_uint16x4_t a, simde_uint16x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a);
|
||||
simde_uint16x8_private
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_u16(a, b, lane) vmul_laneq_u16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_u16
|
||||
#define vmul_laneq_u16(a, b, lane) simde_vmul_laneq_u16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmul_laneq_u32(simde_uint32x2_t a, simde_uint32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a);
|
||||
simde_uint32x4_private
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_u32(a, b, lane) vmul_laneq_u32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_u32
|
||||
#define vmul_laneq_u32(a, b, lane) simde_vmul_laneq_u32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a);
|
||||
simde_float32x2_private b_ = simde_float32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_f32
|
||||
#define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) {
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a);
|
||||
simde_float64x1_private b_ = simde_float64x1_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_f64
|
||||
#define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a);
|
||||
simde_int16x4_private b_ = simde_int16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_s16
|
||||
#define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a);
|
||||
simde_int32x2_private b_ = simde_int32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_s32
|
||||
#define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a);
|
||||
simde_uint16x4_private b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_u16
|
||||
#define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a);
|
||||
simde_uint32x2_private b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_lane_u32
|
||||
#define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_f32
|
||||
#define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_f64
|
||||
#define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_s16
|
||||
#define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_s32
|
||||
#define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_u16
|
||||
#define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_laneq_u32
|
||||
#define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmul_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a);
|
||||
simde_float32x4_private b_ = simde_float32x4_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_f32(a, b, lane) vmul_laneq_f32((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_f32
|
||||
#define vmul_laneq_f32(a, b, lane) simde_vmul_laneq_f32((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmul_laneq_f64(simde_float64x1_t a, simde_float64x2_t b, const int lane)
|
||||
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a);
|
||||
simde_float64x2_private b_ = simde_float64x2_to_private(b);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] * b_.values[lane];
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmul_laneq_f64(a, b, lane) vmul_laneq_f64((a), (b), (lane))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_laneq_f64
|
||||
#define vmul_laneq_f64(a, b, lane) simde_vmul_laneq_f64((a), (b), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */
|
||||
210
lib/simd_wrapper/simde/arm/neon/mul_n.h
Normal file
210
lib/simd_wrapper/simde/arm/neon/mul_n.h
Normal file
@@ -0,0 +1,210 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MUL_N_H)
|
||||
#define SIMDE_ARM_NEON_MUL_N_H
|
||||
|
||||
#include "types.h"
|
||||
#include "mul.h"
|
||||
#include "dup_n.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vmul_n_f32(simde_float32x2_t a, simde_float32 b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_n_f32(a, b);
|
||||
#else
|
||||
return simde_vmul_f32(a, simde_vdup_n_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_f32
|
||||
#define vmul_n_f32(a, b) simde_vmul_n_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vmul_n_f64(simde_float64x1_t a, simde_float64 b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmul_n_f64(a, b);
|
||||
#else
|
||||
return simde_vmul_f64(a, simde_vdup_n_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_f64
|
||||
#define vmul_n_f64(a, b) simde_vmul_n_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmul_n_s16(simde_int16x4_t a, int16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_n_s16(a, b);
|
||||
#else
|
||||
return simde_vmul_s16(a, simde_vdup_n_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_s16
|
||||
#define vmul_n_s16(a, b) simde_vmul_n_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmul_n_s32(simde_int32x2_t a, int32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_n_s32(a, b);
|
||||
#else
|
||||
return simde_vmul_s32(a, simde_vdup_n_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_s32
|
||||
#define vmul_n_s32(a, b) simde_vmul_n_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmul_n_u16(simde_uint16x4_t a, uint16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_n_u16(a, b);
|
||||
#else
|
||||
return simde_vmul_u16(a, simde_vdup_n_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_u16
|
||||
#define vmul_n_u16(a, b) simde_vmul_n_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmul_n_u32(simde_uint32x2_t a, uint32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmul_n_u32(a, b);
|
||||
#else
|
||||
return simde_vmul_u32(a, simde_vdup_n_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmul_n_u32
|
||||
#define vmul_n_u32(a, b) simde_vmul_n_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vmulq_n_f32(simde_float32x4_t a, simde_float32 b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_n_f32(a, b);
|
||||
#else
|
||||
return simde_vmulq_f32(a, simde_vdupq_n_f32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_f32
|
||||
#define vmulq_n_f32(a, b) simde_vmulq_n_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vmulq_n_f64(simde_float64x2_t a, simde_float64 b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmulq_n_f64(a, b);
|
||||
#else
|
||||
return simde_vmulq_f64(a, simde_vdupq_n_f64(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_f64
|
||||
#define vmulq_n_f64(a, b) simde_vmulq_n_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmulq_n_s16(simde_int16x8_t a, int16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_n_s16(a, b);
|
||||
#else
|
||||
return simde_vmulq_s16(a, simde_vdupq_n_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_s16
|
||||
#define vmulq_n_s16(a, b) simde_vmulq_n_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmulq_n_s32(simde_int32x4_t a, int32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_n_s32(a, b);
|
||||
#else
|
||||
return simde_vmulq_s32(a, simde_vdupq_n_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_s32
|
||||
#define vmulq_n_s32(a, b) simde_vmulq_n_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmulq_n_u16(simde_uint16x8_t a, uint16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_n_u16(a, b);
|
||||
#else
|
||||
return simde_vmulq_u16(a, simde_vdupq_n_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_u16
|
||||
#define vmulq_n_u16(a, b) simde_vmulq_n_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmulq_n_u32(simde_uint32x4_t a, uint32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmulq_n_u32(a, b);
|
||||
#else
|
||||
return simde_vmulq_u32(a, simde_vdupq_n_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmulq_n_u32
|
||||
#define vmulq_n_u32(a, b) simde_vmulq_n_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MUL_N_H) */
|
||||
236
lib/simd_wrapper/simde/arm/neon/mull.h
Normal file
236
lib/simd_wrapper/simde/arm/neon/mull.h
Normal file
@@ -0,0 +1,236 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MULL_H)
|
||||
#define SIMDE_ARM_NEON_MULL_H
|
||||
|
||||
#include "types.h"
|
||||
#include "mul.h"
|
||||
#include "movl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmull_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_s8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b));
|
||||
#else
|
||||
simde_int16x8_private r_;
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) * HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_s8
|
||||
#define vmull_s8(a, b) simde_vmull_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmull_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b));
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_s16
|
||||
#define vmull_s16(a, b) simde_vmull_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmull_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_s32(a, b);
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_s32
|
||||
#define vmull_s32(a, b) simde_vmull_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmull_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_u8(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b));
|
||||
#else
|
||||
simde_uint16x8_private r_;
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_u8
|
||||
#define vmull_u8(a, b) simde_vmull_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmull_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_u16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b));
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_u16
|
||||
#define vmull_u16(a, b) simde_vmull_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmull_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_u32(a, b);
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
__typeof__(r_.values) av, bv;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
SIMDE_CONVERT_VECTOR_(bv, b_.values);
|
||||
r_.values = av * bv;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_u32
|
||||
#define vmull_u32(a, b) simde_vmull_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */
|
||||
125
lib/simd_wrapper/simde/arm/neon/mull_high.h
Normal file
125
lib/simd_wrapper/simde/arm/neon/mull_high.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MULL_HIGH_H)
|
||||
#define SIMDE_ARM_NEON_MULL_HIGH_H
|
||||
|
||||
#include "types.h"
|
||||
#include "mul.h"
|
||||
#include "movl_high.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmull_high_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_s8(a, b);
|
||||
#else
|
||||
return simde_vmulq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_s8
|
||||
#define vmull_high_s8(a, b) simde_vmull_high_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_s16(a, b);
|
||||
#else
|
||||
return simde_vmulq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_s16
|
||||
#define vmull_high_s16(a, b) simde_vmull_high_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_s32(a, b);
|
||||
#else
|
||||
return simde_x_vmulq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_s32
|
||||
#define vmull_high_s32(a, b) simde_vmull_high_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmull_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_u8(a, b);
|
||||
#else
|
||||
return simde_vmulq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_u8
|
||||
#define vmull_high_u8(a, b) simde_vmull_high_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmull_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_u16(a, b);
|
||||
#else
|
||||
return simde_vmulq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_u16
|
||||
#define vmull_high_u16(a, b) simde_vmull_high_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmull_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vmull_high_u32(a, b);
|
||||
#else
|
||||
return simde_x_vmulq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_high_u32
|
||||
#define vmull_high_u32(a, b) simde_vmull_high_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_H) */
|
||||
120
lib/simd_wrapper/simde/arm/neon/mull_lane.h
Normal file
120
lib/simd_wrapper/simde/arm/neon/mull_lane.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2021 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MULL_LANE_H)
|
||||
#define SIMDE_ARM_NEON_MULL_LANE_H
|
||||
|
||||
#include "mull.h"
|
||||
#include "dup_lane.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmull_lane_s16(a, v, lane) vmull_lane_s16((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_lane_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_lane_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_lane_s16
|
||||
#define vmull_lane_s16(a, v, lane) simde_vmull_lane_s16((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmull_lane_s32(a, v, lane) vmull_lane_s32((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_lane_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_lane_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_lane_s32
|
||||
#define vmull_lane_s32(a, v, lane) simde_vmull_lane_s32((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmull_lane_u16(a, v, lane) vmull_lane_u16((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_lane_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_lane_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_lane_u16
|
||||
#define vmull_lane_u16(a, v, lane) simde_vmull_lane_u16((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define simde_vmull_lane_u32(a, v, lane) vmull_lane_u32((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_lane_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_lane_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_lane_u32
|
||||
#define vmull_lane_u32(a, v, lane) simde_vmull_lane_u32((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmull_laneq_s16(a, v, lane) vmull_laneq_s16((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_laneq_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_laneq_s16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_laneq_s16
|
||||
#define vmull_laneq_s16(a, v, lane) simde_vmull_laneq_s16((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmull_laneq_s32(a, v, lane) vmull_laneq_s32((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_laneq_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_laneq_s32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_laneq_s32
|
||||
#define vmull_laneq_s32(a, v, lane) simde_vmull_laneq_s32((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmull_laneq_u16(a, v, lane) vmull_laneq_u16((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_laneq_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_laneq_u16((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_laneq_u16
|
||||
#define vmull_laneq_u16(a, v, lane) simde_vmull_laneq_u16((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define simde_vmull_laneq_u32(a, v, lane) vmull_laneq_u32((a), (v), (lane))
|
||||
#else
|
||||
#define simde_vmull_laneq_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_laneq_u32((v), (lane)))
|
||||
#endif
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_laneq_u32
|
||||
#define vmull_laneq_u32(a, v, lane) simde_vmull_laneq_u32((a), (v), (lane))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MULL_LANE_H) */
|
||||
158
lib/simd_wrapper/simde/arm/neon/mull_n.h
Normal file
158
lib/simd_wrapper/simde/arm/neon/mull_n.h
Normal file
@@ -0,0 +1,158 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MULL_N_H)
|
||||
#define SIMDE_ARM_NEON_MULL_N_H
|
||||
|
||||
#include "types.h"
|
||||
#include "mul_n.h"
|
||||
#include "movl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmull_n_s16(simde_int16x4_t a, int16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_n_s16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_n_s32(simde_vmovl_s16(a), b);
|
||||
#else
|
||||
simde_int32x4_private r_;
|
||||
simde_int16x4_private a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
r_.values = av * b;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_n_s16
|
||||
#define vmull_n_s16(a, b) simde_vmull_n_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vmull_n_s32(simde_int32x2_t a, int32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_n_s32(a, b);
|
||||
#else
|
||||
simde_int64x2_private r_;
|
||||
simde_int32x2_private a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
r_.values = av * b;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_n_s32
|
||||
#define vmull_n_s32(a, b) simde_vmull_n_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmull_n_u16(simde_uint16x4_t a, uint16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_n_u16(a, b);
|
||||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
||||
return simde_vmulq_n_u32(simde_vmovl_u16(a), b);
|
||||
#else
|
||||
simde_uint32x4_private r_;
|
||||
simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) av;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
r_.values = av * b;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_n_u16
|
||||
#define vmull_n_u16(a, b) simde_vmull_n_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vmull_n_u32(simde_uint32x2_t a, uint32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmull_n_u32(a, b);
|
||||
#else
|
||||
simde_uint64x2_private r_;
|
||||
simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) av;
|
||||
SIMDE_CONVERT_VECTOR_(av, a_.values);
|
||||
r_.values = av * b;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmull_n_u32
|
||||
#define vmull_n_u32(a, b) simde_vmull_n_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */
|
||||
426
lib/simd_wrapper/simde/arm/neon/mvn.h
Normal file
426
lib/simd_wrapper/simde/arm/neon/mvn.h
Normal file
@@ -0,0 +1,426 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_MVN_H)
|
||||
#define SIMDE_ARM_NEON_MVN_H
|
||||
|
||||
#include "combine.h"
|
||||
#include "get_low.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vmvnq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_s8(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_s8
|
||||
#define vmvnq_s8(a) simde_vmvnq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vmvnq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_s16(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_s16
|
||||
#define vmvnq_s16(a) simde_vmvnq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vmvnq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_s32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_s32
|
||||
#define vmvnq_s32(a) simde_vmvnq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vmvnq_u8(simde_uint8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_u8(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_u8
|
||||
#define vmvnq_u8(a) simde_vmvnq_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vmvnq_u16(simde_uint16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_u16(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_u16
|
||||
#define vmvnq_u16(a) simde_vmvnq_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vmvnq_u32(simde_uint32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvnq_u32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_nor(a, a);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_not(a_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvnq_u32
|
||||
#define vmvnq_u32(a) simde_vmvnq_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vmvn_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_s8
|
||||
#define vmvn_s8(a) simde_vmvn_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vmvn_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_s16(a);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_s16
|
||||
#define vmvn_s16(a) simde_vmvn_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vmvn_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_s32(a);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_s32
|
||||
#define vmvn_s32(a) simde_vmvn_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vmvn_u8(simde_uint8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_u8(a);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_u8
|
||||
#define vmvn_u8(a) simde_vmvn_u8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vmvn_u16(simde_uint16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_u16(a);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_u16
|
||||
#define vmvn_u16(a) simde_vmvn_u16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vmvn_u32(simde_uint32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vmvn_u32(a);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = ~a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = ~(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vmvn_u32
|
||||
#define vmvn_u32(a) simde_vmvn_u32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_MVN_H) */
|
||||
413
lib/simd_wrapper/simde/arm/neon/neg.h
Normal file
413
lib/simd_wrapper/simde/arm/neon/neg.h
Normal file
@@ -0,0 +1,413 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_NEG_H)
|
||||
#define SIMDE_ARM_NEON_NEG_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vnegd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0))
|
||||
return vnegd_s64(a);
|
||||
#else
|
||||
return -a;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegd_s64
|
||||
#define vnegd_s64(a) simde_vnegd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vneg_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vneg_f32(a);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_f32
|
||||
#define vneg_f32(a) simde_vneg_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vneg_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vneg_f64(a);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_f64
|
||||
#define vneg_f64(a) simde_vneg_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vneg_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vneg_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_s8
|
||||
#define vneg_s8(a) simde_vneg_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vneg_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vneg_s16(a);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_s16
|
||||
#define vneg_s16(a) simde_vneg_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vneg_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vneg_s32(a);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_s32
|
||||
#define vneg_s32(a) simde_vneg_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vneg_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vneg_s64(a);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vnegd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vneg_s64
|
||||
#define vneg_s64(a) simde_vneg_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vnegq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vnegq_f32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128 = _mm_castsi128_ps(_mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), _mm_castps_si128(a_.m128)));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_f32
|
||||
#define vnegq_f32(a) simde_vnegq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vnegq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vnegq_f64(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f64x2_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128d = _mm_castsi128_pd(_mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), _mm_castpd_si128(a_.m128d)));
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_f64
|
||||
#define vnegq_f64(a) simde_vnegq_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vnegq_s8(simde_int8x16_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vnegq_s8(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_s8
|
||||
#define vnegq_s8(a) simde_vnegq_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vnegq_s16(simde_int16x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vnegq_s16(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_s16
|
||||
#define vnegq_s16(a) simde_vnegq_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vnegq_s32(simde_int32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vnegq_s32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i32x4_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = -(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_s32
|
||||
#define vnegq_s32(a) simde_vnegq_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vnegq_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vnegq_s64(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
||||
return vec_neg(a);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i64x2_neg(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.m128i);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = -a_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_vnegd_s64(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vnegq_s64
|
||||
#define vnegq_s64(a) simde_vnegq_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_NEG_H) */
|
||||
505
lib/simd_wrapper/simde/arm/neon/orn.h
Normal file
505
lib/simd_wrapper/simde/arm/neon/orn.h
Normal file
@@ -0,0 +1,505 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ORN_H)
|
||||
#define SIMDE_ARM_NEON_ORN_H
|
||||
|
||||
#include "orr.h"
|
||||
#include "mvn.h"
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vorn_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_s8
|
||||
#define vorn_s8(a, b) simde_vorn_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vorn_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_s16
|
||||
#define vorn_s16(a, b) simde_vorn_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vorn_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_s32
|
||||
#define vorn_s32(a, b) simde_vorn_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vorn_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_s64
|
||||
#define vorn_s64(a, b) simde_vorn_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vorn_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_u8
|
||||
#define vorn_u8(a, b) simde_vorn_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vorn_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_u16
|
||||
#define vorn_u16(a, b) simde_vorn_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vorn_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_u32
|
||||
#define vorn_u32(a, b) simde_vorn_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vorn_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorn_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorn_u64
|
||||
#define vorn_u64(a, b) simde_vorn_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vornq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_s8
|
||||
#define vornq_s8(a, b) simde_vornq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vornq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_s16
|
||||
#define vornq_s16(a, b) simde_vornq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vornq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_s32
|
||||
#define vornq_s32(a, b) simde_vornq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vornq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_s64
|
||||
#define vornq_s64(a, b) simde_vornq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vornq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_u8
|
||||
#define vornq_u8(a, b) simde_vornq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vornq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_u16
|
||||
#define vornq_u16(a, b) simde_vornq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vornq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_u32
|
||||
#define vornq_u32(a, b) simde_vornq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vornq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vornq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
||||
return vec_orc(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b),
|
||||
r_;
|
||||
|
||||
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | ~(b_.values);
|
||||
#else
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | ~b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vornq_u64
|
||||
#define vornq_u64(a, b) simde_vornq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ORN_H) */
|
||||
552
lib/simd_wrapper/simde/arm/neon/orr.h
Normal file
552
lib/simd_wrapper/simde/arm/neon/orr.h
Normal file
@@ -0,0 +1,552 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Christopher Moore <moore@free.fr>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ORR_H)
|
||||
#define SIMDE_ARM_NEON_ORR_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vorr_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_s8(a, b);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_s8
|
||||
#define vorr_s8(a, b) simde_vorr_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vorr_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_s16(a, b);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_s16
|
||||
#define vorr_s16(a, b) simde_vorr_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vorr_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_s32(a, b);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a),
|
||||
b_ = simde_int32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_s32
|
||||
#define vorr_s32(a, b) simde_vorr_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vorr_s64(simde_int64x1_t a, simde_int64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_s64(a, b);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a),
|
||||
b_ = simde_int64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_s64
|
||||
#define vorr_s64(a, b) simde_vorr_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vorr_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_u8(a, b);
|
||||
#else
|
||||
simde_uint8x8_private
|
||||
r_,
|
||||
a_ = simde_uint8x8_to_private(a),
|
||||
b_ = simde_uint8x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_u8
|
||||
#define vorr_u8(a, b) simde_vorr_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vorr_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_u16(a, b);
|
||||
#else
|
||||
simde_uint16x4_private
|
||||
r_,
|
||||
a_ = simde_uint16x4_to_private(a),
|
||||
b_ = simde_uint16x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_u16
|
||||
#define vorr_u16(a, b) simde_vorr_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vorr_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_u32(a, b);
|
||||
#else
|
||||
simde_uint32x2_private
|
||||
r_,
|
||||
a_ = simde_uint32x2_to_private(a),
|
||||
b_ = simde_uint32x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_u32
|
||||
#define vorr_u32(a, b) simde_vorr_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vorr_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorr_u64(a, b);
|
||||
#else
|
||||
simde_uint64x1_private
|
||||
r_,
|
||||
a_ = simde_uint64x1_to_private(a),
|
||||
b_ = simde_uint64x1_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_or_si64(a_.m64, b_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorr_u64
|
||||
#define vorr_u64(a, b) simde_vorr_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vorrq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_s8
|
||||
#define vorrq_s8(a, b) simde_vorrq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vorrq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_s16
|
||||
#define vorrq_s16(a, b) simde_vorrq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vorrq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_s32
|
||||
#define vorrq_s32(a, b) simde_vorrq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vorrq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_s64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_int64x2_private
|
||||
r_,
|
||||
a_ = simde_int64x2_to_private(a),
|
||||
b_ = simde_int64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_s64
|
||||
#define vorrq_s64(a, b) simde_vorrq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vorrq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_u8
|
||||
#define vorrq_u8(a, b) simde_vorrq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vorrq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_u16
|
||||
#define vorrq_u16(a, b) simde_vorrq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vorrq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_u32
|
||||
#define vorrq_u32(a, b) simde_vorrq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vorrq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vorrq_u64(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_or(a, b);
|
||||
#else
|
||||
simde_uint64x2_private
|
||||
r_,
|
||||
a_ = simde_uint64x2_to_private(a),
|
||||
b_ = simde_uint64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_or_si128(a_.m128i, b_.m128i);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_v128_or(a_.v128, b_.v128);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.values = a_.values | b_.values;
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] | b_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint64x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vorrq_u64
|
||||
#define vorrq_u64(a, b) simde_vorrq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ORR_H) */
|
||||
211
lib/simd_wrapper/simde/arm/neon/padal.h
Normal file
211
lib/simd_wrapper/simde/arm/neon/padal.h
Normal file
@@ -0,0 +1,211 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_PADAL_H)
|
||||
#define SIMDE_ARM_NEON_PADAL_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "add.h"
|
||||
#include "paddl.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vpadal_s8(simde_int16x4_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_s8(a, b);
|
||||
#else
|
||||
return simde_vadd_s16(a, simde_vpaddl_s8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_s8
|
||||
#define vpadal_s8(a, b) simde_vpadal_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vpadal_s16(simde_int32x2_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_s16(a, b);
|
||||
#else
|
||||
return simde_vadd_s32(a, simde_vpaddl_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_s16
|
||||
#define vpadal_s16(a, b) simde_vpadal_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vpadal_s32(simde_int64x1_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_s32(a, b);
|
||||
#else
|
||||
return simde_vadd_s64(a, simde_vpaddl_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_s32
|
||||
#define vpadal_s32(a, b) simde_vpadal_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vpadal_u8(simde_uint16x4_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_u8(a, b);
|
||||
#else
|
||||
return simde_vadd_u16(a, simde_vpaddl_u8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_u8
|
||||
#define vpadal_u8(a, b) simde_vpadal_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vpadal_u16(simde_uint32x2_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_u16(a, b);
|
||||
#else
|
||||
return simde_vadd_u32(a, simde_vpaddl_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_u16
|
||||
#define vpadal_u16(a, b) simde_vpadal_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x1_t
|
||||
simde_vpadal_u32(simde_uint64x1_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadal_u32(a, b);
|
||||
#else
|
||||
return simde_vadd_u64(a, simde_vpaddl_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadal_u32
|
||||
#define vpadal_u32(a, b) simde_vpadal_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vpadalq_s8(simde_int16x8_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_s8(a, b);
|
||||
#else
|
||||
return simde_vaddq_s16(a, simde_vpaddlq_s8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_s8
|
||||
#define vpadalq_s8(a, b) simde_vpadalq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vpadalq_s16(simde_int32x4_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_s16(a, b);
|
||||
#else
|
||||
return simde_vaddq_s32(a, simde_vpaddlq_s16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_s16
|
||||
#define vpadalq_s16(a, b) simde_vpadalq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vpadalq_s32(simde_int64x2_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_s32(a, b);
|
||||
#else
|
||||
return simde_vaddq_s64(a, simde_vpaddlq_s32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_s32
|
||||
#define vpadalq_s32(a, b) simde_vpadalq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vpadalq_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_u8(a, b);
|
||||
#else
|
||||
return simde_vaddq_u16(a, simde_vpaddlq_u8(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_u8
|
||||
#define vpadalq_u8(a, b) simde_vpadalq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vpadalq_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_u16(a, b);
|
||||
#else
|
||||
return simde_vaddq_u32(a, simde_vpaddlq_u16(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_u16
|
||||
#define vpadalq_u16(a, b) simde_vpadalq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vpadalq_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadalq_u32(a, b);
|
||||
#else
|
||||
return simde_vaddq_u64(a, simde_vpaddlq_u32(b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadalq_u32
|
||||
#define vpadalq_u32(a, b) simde_vpadalq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* SIMDE_ARM_NEON_PADAL_H */
|
||||
388
lib/simd_wrapper/simde/arm/neon/padd.h
Normal file
388
lib/simd_wrapper/simde/arm/neon/padd.h
Normal file
@@ -0,0 +1,388 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020-2021 Evan Nemerson <evan@nemerson.com>
|
||||
* 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_PADD_H)
|
||||
#define SIMDE_ARM_NEON_PADD_H
|
||||
|
||||
#include "add.h"
|
||||
#include "uzp1.h"
|
||||
#include "uzp2.h"
|
||||
#include "types.h"
|
||||
#include "get_lane.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vpaddd_s64(simde_int64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddd_s64(a);
|
||||
#else
|
||||
return simde_vaddd_s64(simde_vgetq_lane_s64(a, 0), simde_vgetq_lane_s64(a, 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddd_s64
|
||||
#define vpaddd_s64(a) simde_vpaddd_s64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
uint64_t
|
||||
simde_vpaddd_u64(simde_uint64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddd_u64(a);
|
||||
#else
|
||||
return simde_vaddd_u64(simde_vgetq_lane_u64(a, 0), simde_vgetq_lane_u64(a, 1));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddd_u64
|
||||
#define vpaddd_u64(a) simde_vpaddd_u64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vpaddd_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddd_f64(a);
|
||||
#else
|
||||
simde_float64x2_private a_ = simde_float64x2_to_private(a);
|
||||
return a_.values[0] + a_.values[1];
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddd_f64
|
||||
#define vpaddd_f64(a) simde_vpaddd_f64((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vpadds_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpadds_f32(a);
|
||||
#else
|
||||
simde_float32x2_private a_ = simde_float32x2_to_private(a);
|
||||
return a_.values[0] + a_.values[1];
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadds_f32
|
||||
#define vpadds_f32(a) simde_vpadds_f32((a))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vpadd_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0)
|
||||
return vpadd_f32(a, b);
|
||||
#else
|
||||
return simde_vadd_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_f32
|
||||
#define vpadd_f32(a, b) simde_vpadd_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vpadd_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_s8(a, b);
|
||||
#else
|
||||
return simde_vadd_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_s8
|
||||
#define vpadd_s8(a, b) simde_vpadd_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vpadd_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_s16(a, b);
|
||||
#elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
return simde_int16x4_from_m64(_mm_hadd_pi16(simde_int16x4_to_m64(a), simde_int16x4_to_m64(b)));
|
||||
#else
|
||||
return simde_vadd_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_s16
|
||||
#define vpadd_s16(a, b) simde_vpadd_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vpadd_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_s32(a, b);
|
||||
#elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
return simde_int32x2_from_m64(_mm_hadd_pi32(simde_int32x2_to_m64(a), simde_int32x2_to_m64(b)));
|
||||
#else
|
||||
return simde_vadd_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_s32
|
||||
#define vpadd_s32(a, b) simde_vpadd_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vpadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_u8(a, b);
|
||||
#else
|
||||
return simde_vadd_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_u8
|
||||
#define vpadd_u8(a, b) simde_vpadd_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vpadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_u16(a, b);
|
||||
#else
|
||||
return simde_vadd_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_u16
|
||||
#define vpadd_u16(a, b) simde_vpadd_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vpadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vpadd_u32(a, b);
|
||||
#else
|
||||
return simde_vadd_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpadd_u32
|
||||
#define vpadd_u32(a, b) simde_vpadd_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vpaddq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_f32(a, b);
|
||||
#elif defined(SIMDE_X86_SSE3_NATIVE)
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a),
|
||||
b_ = simde_float32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE3_NATIVE)
|
||||
r_.m128 = _mm_hadd_ps(a_.m128, b_.m128);
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_f32
|
||||
#define vpaddq_f32(a, b) simde_vpaddq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vpaddq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_f64(a, b);
|
||||
#elif defined(SIMDE_X86_SSE3_NATIVE)
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a),
|
||||
b_ = simde_float64x2_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE3_NATIVE)
|
||||
r_.m128d = _mm_hadd_pd(a_.m128d, b_.m128d);
|
||||
#endif
|
||||
|
||||
return simde_float64x2_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_f64
|
||||
#define vpaddq_f64(a, b) simde_vpaddq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vpaddq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_s8(a, b);
|
||||
#else
|
||||
return simde_vaddq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_s8
|
||||
#define vpaddq_s8(a, b) simde_vpaddq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vpaddq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_s16(a, b);
|
||||
#elif defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
r_.m128i = _mm_hadd_epi16(a_.m128i, b_.m128i);
|
||||
#endif
|
||||
|
||||
return simde_int16x8_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_s16
|
||||
#define vpaddq_s16(a, b) simde_vpaddq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vpaddq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_s32(a, b);
|
||||
#elif defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
r_.m128i = _mm_hadd_epi32(a_.m128i, b_.m128i);
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_s32
|
||||
#define vpaddq_s32(a, b) simde_vpaddq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vpaddq_s64(simde_int64x2_t a, simde_int64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_s64(a, b);
|
||||
#else
|
||||
return simde_vaddq_s64(simde_vuzp1q_s64(a, b), simde_vuzp2q_s64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_s64
|
||||
#define vpaddq_s64(a, b) simde_vpaddq_s64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vpaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_u8(a, b);
|
||||
#else
|
||||
return simde_vaddq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_u8
|
||||
#define vpaddq_u8(a, b) simde_vpaddq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vpaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_u16(a, b);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_u16
|
||||
#define vpaddq_u16(a, b) simde_vpaddq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vpaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_u32(a, b);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_u32
|
||||
#define vpaddq_u32(a, b) simde_vpaddq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vpaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vpaddq_u64(a, b);
|
||||
#else
|
||||
return simde_vaddq_u64(simde_vuzp1q_u64(a, b), simde_vuzp2q_u64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vpaddq_u64
|
||||
#define vpaddq_u64(a, b) simde_vpaddq_u64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_PADD_H) */
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user