Add simde
This commit is contained in:
parent
0f2b3da37e
commit
383bd93261
159
lib/simd_wrapper/simd_wrapper.h
Normal file
159
lib/simd_wrapper/simd_wrapper.h
Normal file
@ -0,0 +1,159 @@
|
||||
/* ==========================================================================
|
||||
* Copyright (c) 2022 SuperTuxKart-Team
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to permit
|
||||
* persons to whom the Software is furnished to do so, subject to the
|
||||
* following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
||||
* NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* ==========================================================================
|
||||
*/
|
||||
#ifndef HEADER_SIMD_WRAPPER_HPP
|
||||
#define HEADER_SIMD_WRAPPER_HPP
|
||||
|
||||
#include <simde/simde-arch.h>
|
||||
#if defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_X86)
|
||||
// Native SSE
|
||||
#if __MMX__ || CPU_ENABLE_MMX
|
||||
#include <mmintrin.h>
|
||||
#define CPU_MMX_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 1 ) ) || CPU_ENABLE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#define CPU_SSE_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE2__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 2 ) ) || CPU_ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define CPU_SSE2_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#define CPU_SSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#define CPU_SSSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#define CPU_SSE4_1_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_2__ || CPU_ENABLE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#define CPU_SSE4_2_SUPPORT (1)
|
||||
#endif
|
||||
|
||||
#elif defined(SIMDE_ARCH_ARM_NEON)
|
||||
// We only enable compile time SSE* to Neon for now because it's easy to test
|
||||
// Enable up to SSE4.2 because after that (starting from AVX) it has few
|
||||
// native conversion, which will use the slower C99 fallback
|
||||
#define CPU_MMX_SUPPORT (1)
|
||||
#define CPU_SSE_SUPPORT (1)
|
||||
#define CPU_SSE2_SUPPORT (1)
|
||||
#define CPU_SSE3_SUPPORT (1)
|
||||
#define CPU_SSSE3_SUPPORT (1)
|
||||
#define CPU_SSE4_1_SUPPORT (1)
|
||||
#define CPU_SSE4_2_SUPPORT (1)
|
||||
|
||||
#if defined(_MSC_VER) && defined(__cplusplus)
|
||||
// Fix math related functions missing in msvc
|
||||
#include <cmath>
|
||||
#endif
|
||||
|
||||
#define SIMDE_ENABLE_NATIVE_ALIASES
|
||||
#include "simde/x86/sse4.2.h"
|
||||
#endif
|
||||
|
||||
#ifndef _MM_FROUND_TO_NEG_INF
|
||||
#define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
|
||||
#endif
|
||||
|
||||
#ifndef _MM_FROUND_NO_EXC
|
||||
#define _MM_FROUND_NO_EXC SIMDE_MM_FROUND_NO_EXC
|
||||
#endif
|
||||
|
||||
#ifndef _MM_SET_ROUNDING_MODE
|
||||
#define _MM_SET_ROUNDING_MODE _MM_SET_ROUNDING_MODE
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_NEAREST
|
||||
#define _MM_ROUND_NEAREST SIMDE_MM_ROUND_NEAREST
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_UP
|
||||
#define _MM_ROUND_UP SIMDE_MM_ROUND_UP
|
||||
#endif
|
||||
|
||||
#ifndef _MM_ROUND_DOWN
|
||||
#define _MM_ROUND_DOWN SIMDE_MM_ROUND_DOWN
|
||||
#endif
|
||||
|
||||
// Utilities for aligned allocation
|
||||
inline void* simd_aligned_alloc(size_t alignment, size_t bytes)
|
||||
{
|
||||
// we need to allocate enough storage for the requested bytes, some
|
||||
// book-keeping (to store the location returned by malloc) and some extra
|
||||
// padding to allow us to find an aligned byte. I'm not entirely sure if
|
||||
// 2 * alignment is enough here, its just a guess.
|
||||
const size_t total_size = bytes + (2 * alignment) + sizeof(size_t);
|
||||
|
||||
// use malloc to allocate the memory.
|
||||
char* data = (char*)malloc(sizeof(char) * total_size);
|
||||
|
||||
if (data)
|
||||
{
|
||||
// store the original start of the malloc'd data.
|
||||
const void* const data_start = data;
|
||||
|
||||
// dedicate enough space to the book-keeping.
|
||||
data += sizeof(size_t);
|
||||
|
||||
// find a memory location with correct alignment. the alignment minus
|
||||
// the remainder of this mod operation is how many bytes forward we need
|
||||
// to move to find an aligned byte.
|
||||
const size_t offset = alignment - (((size_t)data) % alignment);
|
||||
|
||||
// set data to the aligned memory.
|
||||
data += offset;
|
||||
|
||||
// write the book-keeping.
|
||||
size_t* book_keeping = (size_t*)(data - sizeof(size_t));
|
||||
*book_keeping = (size_t)data_start;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
inline void simd_aligned_free(void* raw_data)
|
||||
{
|
||||
if (raw_data)
|
||||
{
|
||||
char* data = (char*)raw_data;
|
||||
|
||||
// we have to assume this memory was allocated with simd_aligned_alloc.
|
||||
// this means the sizeof(size_t) bytes before data are the book-keeping
|
||||
// which points to the location we need to pass to free.
|
||||
data -= sizeof(size_t);
|
||||
|
||||
// set data to the location stored in book-keeping.
|
||||
data = (char*)(*((size_t*)data));
|
||||
|
||||
// free the memory.
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
20
lib/simd_wrapper/simde/COPYING
Normal file
20
lib/simd_wrapper/simde/COPYING
Normal file
@ -0,0 +1,20 @@
|
||||
Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
10
lib/simd_wrapper/simde/README.md
Normal file
10
lib/simd_wrapper/simde/README.md
Normal file
@ -0,0 +1,10 @@
|
||||
# SIMDe Without Test Cases
|
||||
|
||||
This repository contains only the core of
|
||||
[SIMDe](https://github.com/simd-everywhere/simde).
|
||||
It is generated automatically for every commit to master, and is
|
||||
intended to be used as a submodule in projects which don't want to
|
||||
include the (rather large) test cases.
|
||||
|
||||
All development work happens in the main repository, please do not
|
||||
file issues or create pull requests against this repository.
|
210
lib/simd_wrapper/simde/arm/neon.h
Normal file
210
lib/simd_wrapper/simde/arm/neon.h
Normal file
@ -0,0 +1,210 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_H)
|
||||
#define SIMDE_ARM_NEON_H
|
||||
|
||||
#include "neon/types.h"
|
||||
|
||||
#include "neon/aba.h"
|
||||
#include "neon/abd.h"
|
||||
#include "neon/abdl.h"
|
||||
#include "neon/abs.h"
|
||||
#include "neon/add.h"
|
||||
#include "neon/addhn.h"
|
||||
#include "neon/addl.h"
|
||||
#include "neon/addlv.h"
|
||||
#include "neon/addl_high.h"
|
||||
#include "neon/addv.h"
|
||||
#include "neon/addw.h"
|
||||
#include "neon/addw_high.h"
|
||||
#include "neon/and.h"
|
||||
#include "neon/bcax.h"
|
||||
#include "neon/bic.h"
|
||||
#include "neon/bsl.h"
|
||||
#include "neon/cage.h"
|
||||
#include "neon/cagt.h"
|
||||
#include "neon/ceq.h"
|
||||
#include "neon/ceqz.h"
|
||||
#include "neon/cge.h"
|
||||
#include "neon/cgez.h"
|
||||
#include "neon/cgt.h"
|
||||
#include "neon/cgtz.h"
|
||||
#include "neon/cle.h"
|
||||
#include "neon/clez.h"
|
||||
#include "neon/cls.h"
|
||||
#include "neon/clt.h"
|
||||
#include "neon/cltz.h"
|
||||
#include "neon/clz.h"
|
||||
#include "neon/cmla.h"
|
||||
#include "neon/cmla_rot90.h"
|
||||
#include "neon/cmla_rot180.h"
|
||||
#include "neon/cmla_rot270.h"
|
||||
#include "neon/cnt.h"
|
||||
#include "neon/cvt.h"
|
||||
#include "neon/combine.h"
|
||||
#include "neon/create.h"
|
||||
#include "neon/dot.h"
|
||||
#include "neon/dot_lane.h"
|
||||
#include "neon/dup_lane.h"
|
||||
#include "neon/dup_n.h"
|
||||
#include "neon/eor.h"
|
||||
#include "neon/ext.h"
|
||||
#include "neon/fma.h"
|
||||
#include "neon/fma_lane.h"
|
||||
#include "neon/fma_n.h"
|
||||
#include "neon/get_high.h"
|
||||
#include "neon/get_lane.h"
|
||||
#include "neon/get_low.h"
|
||||
#include "neon/hadd.h"
|
||||
#include "neon/hsub.h"
|
||||
#include "neon/ld1.h"
|
||||
#include "neon/ld1_dup.h"
|
||||
#include "neon/ld1_lane.h"
|
||||
#include "neon/ld2.h"
|
||||
#include "neon/ld3.h"
|
||||
#include "neon/ld4.h"
|
||||
#include "neon/ld4_lane.h"
|
||||
#include "neon/max.h"
|
||||
#include "neon/maxnm.h"
|
||||
#include "neon/maxv.h"
|
||||
#include "neon/min.h"
|
||||
#include "neon/minnm.h"
|
||||
#include "neon/minv.h"
|
||||
#include "neon/mla.h"
|
||||
#include "neon/mla_n.h"
|
||||
#include "neon/mlal.h"
|
||||
#include "neon/mlal_high.h"
|
||||
#include "neon/mlal_high_n.h"
|
||||
#include "neon/mlal_lane.h"
|
||||
#include "neon/mlal_n.h"
|
||||
#include "neon/mls.h"
|
||||
#include "neon/mls_n.h"
|
||||
#include "neon/mlsl.h"
|
||||
#include "neon/mlsl_high.h"
|
||||
#include "neon/mlsl_high_n.h"
|
||||
#include "neon/mlsl_lane.h"
|
||||
#include "neon/mlsl_n.h"
|
||||
#include "neon/movl.h"
|
||||
#include "neon/movl_high.h"
|
||||
#include "neon/movn.h"
|
||||
#include "neon/movn_high.h"
|
||||
#include "neon/mul.h"
|
||||
#include "neon/mul_lane.h"
|
||||
#include "neon/mul_n.h"
|
||||
#include "neon/mull.h"
|
||||
#include "neon/mull_high.h"
|
||||
#include "neon/mull_lane.h"
|
||||
#include "neon/mull_n.h"
|
||||
#include "neon/mvn.h"
|
||||
#include "neon/neg.h"
|
||||
#include "neon/orn.h"
|
||||
#include "neon/orr.h"
|
||||
#include "neon/padal.h"
|
||||
#include "neon/padd.h"
|
||||
#include "neon/paddl.h"
|
||||
#include "neon/pmax.h"
|
||||
#include "neon/pmin.h"
|
||||
#include "neon/qabs.h"
|
||||
#include "neon/qadd.h"
|
||||
#include "neon/qdmulh.h"
|
||||
#include "neon/qdmulh_lane.h"
|
||||
#include "neon/qdmulh_n.h"
|
||||
#include "neon/qdmull.h"
|
||||
#include "neon/qrdmulh.h"
|
||||
#include "neon/qrdmulh_lane.h"
|
||||
#include "neon/qrdmulh_n.h"
|
||||
#include "neon/qrshrn_n.h"
|
||||
#include "neon/qrshrun_n.h"
|
||||
#include "neon/qmovn.h"
|
||||
#include "neon/qmovun.h"
|
||||
#include "neon/qmovn_high.h"
|
||||
#include "neon/qneg.h"
|
||||
#include "neon/qsub.h"
|
||||
#include "neon/qshl.h"
|
||||
#include "neon/qshlu_n.h"
|
||||
#include "neon/qshrn_n.h"
|
||||
#include "neon/qshrun_n.h"
|
||||
#include "neon/qtbl.h"
|
||||
#include "neon/qtbx.h"
|
||||
#include "neon/rbit.h"
|
||||
#include "neon/recpe.h"
|
||||
#include "neon/recps.h"
|
||||
#include "neon/reinterpret.h"
|
||||
#include "neon/rev16.h"
|
||||
#include "neon/rev32.h"
|
||||
#include "neon/rev64.h"
|
||||
#include "neon/rhadd.h"
|
||||
#include "neon/rnd.h"
|
||||
#include "neon/rndm.h"
|
||||
#include "neon/rndi.h"
|
||||
#include "neon/rndn.h"
|
||||
#include "neon/rndp.h"
|
||||
#include "neon/rshl.h"
|
||||
#include "neon/rshr_n.h"
|
||||
#include "neon/rshrn_n.h"
|
||||
#include "neon/rsqrte.h"
|
||||
#include "neon/rsqrts.h"
|
||||
#include "neon/rsra_n.h"
|
||||
#include "neon/set_lane.h"
|
||||
#include "neon/shl.h"
|
||||
#include "neon/shl_n.h"
|
||||
#include "neon/shll_n.h"
|
||||
#include "neon/shr_n.h"
|
||||
#include "neon/shrn_n.h"
|
||||
#include "neon/sqadd.h"
|
||||
#include "neon/sra_n.h"
|
||||
#include "neon/sri_n.h"
|
||||
#include "neon/st1.h"
|
||||
#include "neon/st1_lane.h"
|
||||
#include "neon/st2.h"
|
||||
#include "neon/st2_lane.h"
|
||||
#include "neon/st3.h"
|
||||
#include "neon/st3_lane.h"
|
||||
#include "neon/st4.h"
|
||||
#include "neon/st4_lane.h"
|
||||
#include "neon/sub.h"
|
||||
#include "neon/subhn.h"
|
||||
#include "neon/subl.h"
|
||||
#include "neon/subl_high.h"
|
||||
#include "neon/subw.h"
|
||||
#include "neon/subw_high.h"
|
||||
#include "neon/tbl.h"
|
||||
#include "neon/tbx.h"
|
||||
#include "neon/trn.h"
|
||||
#include "neon/trn1.h"
|
||||
#include "neon/trn2.h"
|
||||
#include "neon/tst.h"
|
||||
#include "neon/uqadd.h"
|
||||
#include "neon/uzp.h"
|
||||
#include "neon/uzp1.h"
|
||||
#include "neon/uzp2.h"
|
||||
#include "neon/xar.h"
|
||||
#include "neon/zip.h"
|
||||
#include "neon/zip1.h"
|
||||
#include "neon/zip2.h"
|
||||
|
||||
#endif /* SIMDE_ARM_NEON_H */
|
208
lib/simd_wrapper/simde/arm/neon/aba.h
Normal file
208
lib/simd_wrapper/simde/arm/neon/aba.h
Normal file
@ -0,0 +1,208 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABA_H)
|
||||
#define SIMDE_ARM_NEON_ABA_H
|
||||
|
||||
#include "abd.h"
|
||||
#include "add.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s8(simde_vabd_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s8
|
||||
#define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s16(simde_vabd_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s16
|
||||
#define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_s32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_s32(simde_vabd_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_s32
|
||||
#define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u8(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u8(simde_vabd_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u8
|
||||
#define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u16(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u16(simde_vabd_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u16
|
||||
#define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vaba_u32(a, b, c);
|
||||
#else
|
||||
return simde_vadd_u32(simde_vabd_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vaba_u32
|
||||
#define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s8(simde_vabdq_s8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s8
|
||||
#define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s16(simde_vabdq_s16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s16
|
||||
#define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_s32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_s32(simde_vabdq_s32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_s32
|
||||
#define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u8(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u8(simde_vabdq_u8(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u8
|
||||
#define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u16(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u16(simde_vabdq_u16(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u16
|
||||
#define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabaq_u32(a, b, c);
|
||||
#else
|
||||
return simde_vaddq_u32(simde_vabdq_u32(b, c), a);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabaq_u32
|
||||
#define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABA_H) */
|
489
lib/simd_wrapper/simde/arm/neon/abd.h
Normal file
489
lib/simd_wrapper/simde/arm/neon/abd.h
Normal file
@ -0,0 +1,489 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABD_H)
|
||||
#define SIMDE_ARM_NEON_ABD_H
|
||||
|
||||
#include "abs.h"
|
||||
#include "subl.h"
|
||||
#include "movn.h"
|
||||
#include "movl.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32_t
|
||||
simde_vabds_f32(simde_float32_t a, simde_float32_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabds_f32(a, b);
|
||||
#else
|
||||
simde_float32_t r = a - b;
|
||||
return r < 0 ? -r : r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabds_f32
|
||||
#define vabds_f32(a, b) simde_vabds_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64_t
|
||||
simde_vabdd_f64(simde_float64_t a, simde_float64_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabdd_f64(a, b);
|
||||
#else
|
||||
simde_float64_t r = a - b;
|
||||
return r < 0 ? -r : r;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdd_f64
|
||||
#define vabdd_f64(a, b) simde_vabdd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_f32(a, b);
|
||||
#else
|
||||
return simde_vabs_f32(simde_vsub_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_f32
|
||||
#define vabd_f32(a, b) simde_vabd_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabd_f64(a, b);
|
||||
#else
|
||||
return simde_vabs_f64(simde_vsub_f64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_f64
|
||||
#define vabd_f64(a, b) simde_vabd_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s8(a, b);
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE)
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a),
|
||||
b_ = simde_int8x8_to_private(b);
|
||||
|
||||
const __m64 m = _mm_cmpgt_pi8(b_.m64, a_.m64);
|
||||
r_.m64 =
|
||||
_mm_xor_si64(
|
||||
_mm_add_pi8(
|
||||
_mm_sub_pi8(a_.m64, b_.m64),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s8
|
||||
#define vabd_s8(a, b) simde_vabd_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s16(a, b);
|
||||
#elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a),
|
||||
b_ = simde_int16x4_to_private(b);
|
||||
|
||||
r_.m64 = _mm_sub_pi16(_mm_max_pi16(a_.m64, b_.m64), _mm_min_pi16(a_.m64, b_.m64));
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#else
|
||||
return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s16
|
||||
#define vabd_s16(a, b) simde_vabd_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_s32(a, b);
|
||||
#else
|
||||
return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b)));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_s32
|
||||
#define vabd_s32(a, b) simde_vabd_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x8_t
|
||||
simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u8(a, b);
|
||||
#else
|
||||
return simde_vmovn_u16(
|
||||
simde_vreinterpretq_u16_s16(
|
||||
simde_vabsq_s16(
|
||||
simde_vsubq_s16(
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u8
|
||||
#define vabd_u8(a, b) simde_vabd_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x4_t
|
||||
simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u16(a, b);
|
||||
#else
|
||||
return simde_vmovn_u32(
|
||||
simde_vreinterpretq_u32_s32(
|
||||
simde_vabsq_s32(
|
||||
simde_vsubq_s32(
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u16
|
||||
#define vabd_u16(a, b) simde_vabd_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x2_t
|
||||
simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabd_u32(a, b);
|
||||
#else
|
||||
return simde_vmovn_u64(
|
||||
simde_vreinterpretq_u64_s64(
|
||||
simde_vabsq_s64(
|
||||
simde_vsubq_s64(
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))))));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabd_u32
|
||||
#define vabd_u32(a, b) simde_vabd_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_f32(a, b);
|
||||
#else
|
||||
return simde_vabsq_f32(simde_vsubq_f32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_f32
|
||||
#define vabdq_f32(a, b) simde_vabdq_f32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabdq_f64(a, b);
|
||||
#else
|
||||
return simde_vabsq_f64(simde_vsubq_f64(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_f64
|
||||
#define vabdq_f64(a, b) simde_vabdq_f64((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x16_t
|
||||
simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int8x16_private
|
||||
r_,
|
||||
a_ = simde_int8x16_to_private(a),
|
||||
b_ = simde_int8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_sub_epi8(_mm_max_epi8(a_.m128i, b_.m128i), _mm_min_epi8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_cmpgt_epi8(b_.m128i, a_.m128i);
|
||||
r_.m128i =
|
||||
_mm_xor_si128(
|
||||
_mm_add_epi8(
|
||||
_mm_sub_epi8(a_.m128i, b_.m128i),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_sub(wasm_i8x16_max(a_.v128, b_.v128), wasm_i8x16_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s8
|
||||
#define vabdq_s8(a, b) simde_vabdq_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int16x8_private
|
||||
r_,
|
||||
a_ = simde_int16x8_to_private(a),
|
||||
b_ = simde_int16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881658604 */
|
||||
r_.m128i = _mm_sub_epi16(_mm_max_epi16(a_.m128i, b_.m128i), _mm_min_epi16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_sub(wasm_i16x8_max(a_.v128, b_.v128), wasm_i16x8_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] =
|
||||
(a_.values[i] < b_.values[i]) ?
|
||||
(b_.values[i] - a_.values[i]) :
|
||||
(a_.values[i] - b_.values[i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
return simde_int16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s16
|
||||
#define vabdq_s16(a, b) simde_vabdq_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_s32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_int32x4_private
|
||||
r_,
|
||||
a_ = simde_int32x4_to_private(a),
|
||||
b_ = simde_int32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
r_.m128i = _mm_sub_epi32(_mm_max_epi32(a_.m128i, b_.m128i), _mm_min_epi32(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
||||
const __m128i m = _mm_cmpgt_epi32(b_.m128i, a_.m128i);
|
||||
r_.m128i =
|
||||
_mm_xor_si128(
|
||||
_mm_add_epi32(
|
||||
_mm_sub_epi32(a_.m128i, b_.m128i),
|
||||
m
|
||||
),
|
||||
m
|
||||
);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_s32
|
||||
#define vabdq_s32(a, b) simde_vabdq_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint8x16_t
|
||||
simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u8(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint8x16_private
|
||||
r_,
|
||||
a_ = simde_uint8x16_to_private(a),
|
||||
b_ = simde_uint8x16_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi8(_mm_max_epu8(a_.m128i, b_.m128i), _mm_min_epu8(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i8x16_sub(wasm_u8x16_max(a_.v128, b_.v128), wasm_u8x16_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint8x16_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u8
|
||||
#define vabdq_u8(a, b) simde_vabdq_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u16(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint16x8_private
|
||||
r_,
|
||||
a_ = simde_uint16x8_to_private(a),
|
||||
b_ = simde_uint16x8_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi16(_mm_max_epu16(a_.m128i, b_.m128i), _mm_min_epu16(a_.m128i, b_.m128i));
|
||||
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_i16x8_sub(wasm_u16x8_max(a_.v128, b_.v128), wasm_u16x8_min(a_.v128, b_.v128));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint16x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u16
|
||||
#define vabdq_u16(a, b) simde_vabdq_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdq_u32(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
||||
return vec_absd(a, b);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_sub(vec_max(a, b), vec_min(a, b));
|
||||
#elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
|
||||
return vec_max(a, b) - vec_min(a, b);
|
||||
#else
|
||||
simde_uint32x4_private
|
||||
r_,
|
||||
a_ = simde_uint32x4_to_private(a),
|
||||
b_ = simde_uint32x4_to_private(b);
|
||||
|
||||
#if defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
r_.m128i = _mm_sub_epi32(_mm_max_epu32(a_.m128i, b_.m128i), _mm_min_epu32(a_.m128i, b_.m128i));
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]);
|
||||
r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_uint32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdq_u32
|
||||
#define vabdq_u32(a, b) simde_vabdq_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABD_H) */
|
147
lib/simd_wrapper/simde/arm/neon/abdl.h
Normal file
147
lib/simd_wrapper/simde/arm/neon/abdl.h
Normal file
@ -0,0 +1,147 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABDL_H)
|
||||
#define SIMDE_ARM_NEON_ABDL_H
|
||||
|
||||
#include "abs.h"
|
||||
#include "subl.h"
|
||||
#include "movl.h"
|
||||
#include "reinterpret.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x8_t
|
||||
simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s8(a, b);
|
||||
#else
|
||||
return simde_vabsq_s16(simde_vsubl_s8(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s8
|
||||
#define vabdl_s8(a, b) simde_vabdl_s8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x4_t
|
||||
simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s16(a, b);
|
||||
#else
|
||||
return simde_vabsq_s32(simde_vsubl_s16(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s16
|
||||
#define vabdl_s16(a, b) simde_vabdl_s16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x2_t
|
||||
simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_s32(a, b);
|
||||
#else
|
||||
return simde_vabsq_s64(simde_vsubl_s32(a, b));
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_s32
|
||||
#define vabdl_s32(a, b) simde_vabdl_s32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint16x8_t
|
||||
simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u8(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u16_s16(
|
||||
simde_vabsq_s16(
|
||||
simde_vsubq_s16(
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)),
|
||||
simde_vreinterpretq_s16_u16(simde_vmovl_u8(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u8
|
||||
#define vabdl_u8(a, b) simde_vabdl_u8((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint32x4_t
|
||||
simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u16(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u32_s32(
|
||||
simde_vabsq_s32(
|
||||
simde_vsubq_s32(
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)),
|
||||
simde_vreinterpretq_s32_u32(simde_vmovl_u16(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u16
|
||||
#define vabdl_u16(a, b) simde_vabdl_u16((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_uint64x2_t
|
||||
simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabdl_u32(a, b);
|
||||
#else
|
||||
return simde_vreinterpretq_u64_s64(
|
||||
simde_vabsq_s64(
|
||||
simde_vsubq_s64(
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)),
|
||||
simde_vreinterpretq_s64_u64(simde_vmovl_u32(b))
|
||||
)
|
||||
)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabdl_u32
|
||||
#define vabdl_u32(a, b) simde_vabdl_u32((a), (b))
|
||||
#endif
|
||||
|
||||
SIMDE_END_DECLS_
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */
|
431
lib/simd_wrapper/simde/arm/neon/abs.h
Normal file
431
lib/simd_wrapper/simde/arm/neon/abs.h
Normal file
@ -0,0 +1,431 @@
|
||||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_ABS_H)
|
||||
#define SIMDE_ARM_NEON_ABS_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
SIMDE_BEGIN_DECLS_
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
int64_t
|
||||
simde_vabsd_s64(int64_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0))
|
||||
return vabsd_s64(a);
|
||||
#else
|
||||
return a < 0 ? -a : a;
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsd_s64
|
||||
#define vabsd_s64(a) simde_vabsd_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x2_t
|
||||
simde_vabs_f32(simde_float32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_f32(a);
|
||||
#else
|
||||
simde_float32x2_private
|
||||
r_,
|
||||
a_ = simde_float32x2_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
|
||||
return simde_float32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_f32
|
||||
#define vabs_f32(a) simde_vabs_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x1_t
|
||||
simde_vabs_f64(simde_float64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabs_f64(a);
|
||||
#else
|
||||
simde_float64x1_private
|
||||
r_,
|
||||
a_ = simde_float64x1_to_private(a);
|
||||
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
|
||||
return simde_float64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_f64
|
||||
#define vabs_f64(a) simde_vabs_f64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int8x8_t
|
||||
simde_vabs_s8(simde_int8x8_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s8(a);
|
||||
#else
|
||||
simde_int8x8_private
|
||||
r_,
|
||||
a_ = simde_int8x8_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi8(a_.m64);
|
||||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int8x8_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s8
|
||||
#define vabs_s8(a) simde_vabs_s8(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int16x4_t
|
||||
simde_vabs_s16(simde_int16x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s16(a);
|
||||
#else
|
||||
simde_int16x4_private
|
||||
r_,
|
||||
a_ = simde_int16x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi16(a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int16x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s16
|
||||
#define vabs_s16(a) simde_vabs_s16(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int32x2_t
|
||||
simde_vabs_s32(simde_int32x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabs_s32(a);
|
||||
#else
|
||||
simde_int32x2_private
|
||||
r_,
|
||||
a_ = simde_int32x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
||||
r_.m64 = _mm_abs_pi32(a_.m64);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int32x2_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s32
|
||||
#define vabs_s32(a) simde_vabs_s32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_int64x1_t
|
||||
simde_vabs_s64(simde_int64x1_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabs_s64(a);
|
||||
#else
|
||||
simde_int64x1_private
|
||||
r_,
|
||||
a_ = simde_int64x1_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
__typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
|
||||
r_.values = (-a_.values & m) | (a_.values & ~m);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_int64x1_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabs_s64
|
||||
#define vabs_s64(a) simde_vabs_s64(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float32x4_t
|
||||
simde_vabsq_f32(simde_float32x4_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
return vabsq_f32(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_float32x4_private
|
||||
r_,
|
||||
a_ = simde_float32x4_to_private(a);
|
||||
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
r_.v128 = wasm_f32x4_abs(a_.v128);
|
||||
#elif defined(SIMDE_X86_SSE_NATIVE)
|
||||
simde_float32 mask_;
|
||||
uint32_t u32_ = UINT32_C(0x7FFFFFFF);
|
||||
simde_memcpy(&mask_, &u32_, sizeof(u32_));
|
||||
r_.m128 = _mm_and_ps(_mm_set1_ps(mask_), a_.m128);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
|
||||
r_.values[i] = simde_math_fabsf(a_.values[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simde_float32x4_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
|
||||
#undef vabsq_f32
|
||||
#define vabsq_f32(a) simde_vabsq_f32(a)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde_float64x2_t
|
||||
simde_vabsq_f64(simde_float64x2_t a) {
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vabsq_f64(a);
|
||||
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
||||
return vec_abs(a);
|
||||
#else
|
||||
simde_float64x2_private
|
||||
r_,
|
||||
a_ = simde_float64x2_to_private(a);
|
||||
|
||||
#if defined(SIMDE_X86_SSE2_NATIVE)
|
||||
simde_float64 mask_;
|
||||
uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
|
||||
simde_memcpy(&mask_, &u64_, sizeof(u64_));
|
||||
r_.m128d = _mm_and_pd(_mm_set1_pd(mask_), a_.m128d);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||