Use simde on graphics_utils
This commit is contained in:
parent
383bd93261
commit
98eb912a76
@ -153,7 +153,8 @@ LOCAL_MODULE := graphics_utils
|
||||
LOCAL_PATH := .
|
||||
LOCAL_CPP_FEATURES += rtti
|
||||
LOCAL_SRC_FILES := $(wildcard ../lib/graphics_utils/mipmap/*.c)
|
||||
LOCAL_CFLAGS := -I../lib/graphics_utils/mipmap
|
||||
LOCAL_CFLAGS := -I../lib/graphics_utils/mipmap \
|
||||
-I../lib/simd_wrapper
|
||||
ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
|
||||
LOCAL_ARM_NEON := false
|
||||
endif
|
||||
|
@ -1,4 +1,5 @@
|
||||
cmake_minimum_required(VERSION 2.6)
|
||||
include_directories("${PROJECT_SOURCE_DIR}/lib/simd_wrapper")
|
||||
if (UNIX OR MINGW)
|
||||
add_definitions(-O3)
|
||||
endif()
|
||||
|
@ -32,36 +32,8 @@
|
||||
|
||||
|
||||
////
|
||||
#include <simd_wrapper.h>
|
||||
|
||||
|
||||
#if __MMX__ || CPU_ENABLE_MMX
|
||||
#include <mmintrin.h>
|
||||
#define CPU_MMX_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 1 ) ) || CPU_ENABLE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#define CPU_SSE_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE2__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 2 ) ) || CPU_ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define CPU_SSE2_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#define CPU_SSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#define CPU_SSSE3_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#define CPU_SSE4_1_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4_2__ || CPU_ENABLE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#define CPU_SSE4_2_SUPPORT (1)
|
||||
#endif
|
||||
#if __SSE4A__ || CPU_ENABLE_SSE4A
|
||||
#include <ammintrin.h>
|
||||
#define CPU_SSE4A_SUPPORT (1)
|
||||
@ -355,7 +327,7 @@ static inline CC_ALWAYSINLINE __m128 simd4f_pow5d12_inline_ps( __m128 vx )
|
||||
static inline void simdPrintDebugSSE4f( char *str, __m128 v )
|
||||
{
|
||||
float CPU_ALIGN16 store[4];
|
||||
_mm_store_ps( (void *)store, v );
|
||||
_mm_store_ps( (float *)store, v );
|
||||
printf( "%s %f %f %f %f\n", str, (double)store[0], (double)store[1], (double)store[2], (double)store[3] );
|
||||
return;
|
||||
}
|
||||
@ -363,7 +335,7 @@ static inline void simdPrintDebugSSE4f( char *str, __m128 v )
|
||||
static inline void simdPrintDebugSSE2d( char *str, __m128d v )
|
||||
{
|
||||
double CPU_ALIGN16 store[2];
|
||||
_mm_store_pd( (void *)store, v );
|
||||
_mm_store_pd( (double *)store, v );
|
||||
printf( "%s %f %f\n", str, store[0], store[1] );
|
||||
return;
|
||||
}
|
||||
|
@ -776,7 +776,7 @@ static void imStaticKernel4Linear( unsigned char *dst, int pointx, int pointy, i
|
||||
}
|
||||
|
||||
#if CPU_SSE2_SUPPORT
|
||||
_mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
#else
|
||||
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
||||
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
||||
@ -825,7 +825,7 @@ static void imStaticKernel4Linear_Core( unsigned char *dst, int pointx, int poin
|
||||
mapy = 0;
|
||||
}
|
||||
|
||||
_mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
|
||||
return;
|
||||
}
|
||||
@ -1091,7 +1091,7 @@ static void imStaticKernel2sRGB( unsigned char *dst, int pointx, int pointy, imS
|
||||
uint32_t i;
|
||||
} u;
|
||||
vsum = linear2srgb3( vsum );
|
||||
_mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
dst[0] = u.c[0];
|
||||
dst[1] = u.c[1];
|
||||
#else
|
||||
@ -1157,7 +1157,7 @@ static void imStaticKernel3sRGB( unsigned char *dst, int pointx, int pointy, imS
|
||||
uint32_t i;
|
||||
} u;
|
||||
vsum = linear2srgb3( vsum );
|
||||
_mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
dst[0] = u.c[0];
|
||||
dst[1] = u.c[1];
|
||||
dst[2] = u.c[2];
|
||||
@ -1222,7 +1222,7 @@ static void imStaticKernel4sRGB( unsigned char *dst, int pointx, int pointy, imS
|
||||
|
||||
#if CPU_SSE2_SUPPORT
|
||||
vsum = linear2srgb3( vsum );
|
||||
_mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
#else
|
||||
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
||||
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
||||
@ -1291,7 +1291,7 @@ static void imStaticKernel3sRGB_Core( unsigned char *dst, int pointx, int pointy
|
||||
#endif
|
||||
|
||||
vsum0 = linear2srgb3( vsum0 );
|
||||
_mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
|
||||
dst[0] = u.c[0];
|
||||
dst[1] = u.c[1];
|
||||
dst[2] = u.c[2];
|
||||
@ -1332,7 +1332,7 @@ static void imStaticKernel4sRGB_Core( unsigned char *dst, int pointx, int pointy
|
||||
}
|
||||
|
||||
vsum = linear2srgb3( vsum );
|
||||
_mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
|
||||
return;
|
||||
}
|
||||
@ -2729,7 +2729,7 @@ static void imDynamicKernel2sRGB( unsigned char *dst, imGenericMatrixState *stat
|
||||
uint32_t i;
|
||||
} u;
|
||||
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
||||
_mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
dst[0] = u.c[0];
|
||||
dst[1] = u.c[1];
|
||||
#else
|
||||
@ -2800,7 +2800,7 @@ static void imDynamicKernel3sRGB( unsigned char *dst, imGenericMatrixState *stat
|
||||
uint32_t i;
|
||||
} u;
|
||||
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
||||
_mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
dst[0] = u.c[0];
|
||||
dst[1] = u.c[1];
|
||||
dst[2] = u.c[2];
|
||||
@ -2871,7 +2871,7 @@ static void imDynamicKernel4sRGB( unsigned char *dst, imGenericMatrixState *stat
|
||||
|
||||
#if CPU_SSE2_SUPPORT
|
||||
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
||||
_mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
||||
#else
|
||||
matrixsum = 1.0f / matrixsum;
|
||||
sum0 *= matrixsum;
|
||||
|
Loading…
x
Reference in New Issue
Block a user