Use simde on graphics_utils

2022-04-29 11:28:47 +08:00 · 2022-04-29 11:28:47 +08:00 · 98eb912a76
commit 98eb912a76
parent 383bd93261
4 changed files with 16 additions and 42 deletions
--- a/android/Android.mk
+++ b/android/Android.mk
@ -153,7 +153,8 @@ LOCAL_MODULE       := graphics_utils
 LOCAL_PATH         := .
 LOCAL_CPP_FEATURES += rtti
 LOCAL_SRC_FILES    := $(wildcard ../lib/graphics_utils/mipmap/*.c)
-LOCAL_CFLAGS       := -I../lib/graphics_utils/mipmap
+LOCAL_CFLAGS       := -I../lib/graphics_utils/mipmap \
+                      -I../lib/simd_wrapper
 ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
 LOCAL_ARM_NEON     := false
 endif
--- a/lib/graphics_utils/CMakeLists.txt
+++ b/lib/graphics_utils/CMakeLists.txt
@ -1,4 +1,5 @@
 cmake_minimum_required(VERSION 2.6)
+include_directories("${PROJECT_SOURCE_DIR}/lib/simd_wrapper")
 if (UNIX OR MINGW)
    add_definitions(-O3)
 endif()
--- a/lib/graphics_utils/mipmap/cpusimd.h
+++ b/lib/graphics_utils/mipmap/cpusimd.h
@ -32,36 +32,8 @@


 ////
+#include <simd_wrapper.h>

-
-#if __MMX__ || CPU_ENABLE_MMX
- #include <mmintrin.h>
- #define CPU_MMX_SUPPORT (1)
-#endif
-#if __SSE__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 1 ) ) || CPU_ENABLE_SSE
- #include <xmmintrin.h>
- #define CPU_SSE_SUPPORT (1)
-#endif
-#if __SSE2__ || defined(_M_X64) || ( defined(_M_IX86_FP) && ( _M_IX86_FP >= 2 ) ) || CPU_ENABLE_SSE2
- #include <emmintrin.h>
- #define CPU_SSE2_SUPPORT (1)
-#endif
-#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
- #include <pmmintrin.h>
- #define CPU_SSE3_SUPPORT (1)
-#endif
-#if __SSSE3__ || __AVX__  || CPU_ENABLE_SSSE3
- #include <tmmintrin.h>
- #define CPU_SSSE3_SUPPORT (1)
-#endif
-#if __SSE4_1__ || __AVX__  || CPU_ENABLE_SSE4_1
- #include <smmintrin.h>
- #define CPU_SSE4_1_SUPPORT (1)
-#endif
-#if __SSE4_2__ || CPU_ENABLE_SSE4_2
- #include <nmmintrin.h>
- #define CPU_SSE4_2_SUPPORT (1)
-#endif
 #if __SSE4A__ || CPU_ENABLE_SSE4A
 #include <ammintrin.h>
 #define CPU_SSE4A_SUPPORT (1)
@ -355,7 +327,7 @@ static inline CC_ALWAYSINLINE __m128 simd4f_pow5d12_inline_ps( __m128 vx )
 static inline void simdPrintDebugSSE4f( char *str, __m128 v )
 {
  float CPU_ALIGN16 store[4];
-  _mm_store_ps( (void *)store, v );
+  _mm_store_ps( (float *)store, v );
  printf( "%s %f %f %f %f\n", str, (double)store[0], (double)store[1], (double)store[2], (double)store[3] );
  return;
 }
@ -363,7 +335,7 @@ static inline void simdPrintDebugSSE4f( char *str, __m128 v )
 static inline void simdPrintDebugSSE2d( char *str, __m128d v )
 {
  double CPU_ALIGN16 store[2];
-  _mm_store_pd( (void *)store, v );
+  _mm_store_pd( (double *)store, v );
  printf( "%s %f %f\n", str, store[0], store[1] );
  return;
 }
--- a/lib/graphics_utils/mipmap/imgresize.c
+++ b/lib/graphics_utils/mipmap/imgresize.c
@ -776,7 +776,7 @@ static void imStaticKernel4Linear( unsigned char *dst, int pointx, int pointy, i
  }

 #if CPU_SSE2_SUPPORT
-  _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
 #else
  dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
  dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
@ -825,7 +825,7 @@ static void imStaticKernel4Linear_Core( unsigned char *dst, int pointx, int poin
      mapy = 0;
  }

-  _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );

  return;
 }
@ -1091,7 +1091,7 @@ static void imStaticKernel2sRGB( unsigned char *dst, int pointx, int pointy, imS
    uint32_t i;
  } u;
  vsum = linear2srgb3( vsum );
-  _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
  dst[0] = u.c[0];
  dst[1] = u.c[1];
 #else
@ -1157,7 +1157,7 @@ static void imStaticKernel3sRGB( unsigned char *dst, int pointx, int pointy, imS
    uint32_t i;
  } u;
  vsum = linear2srgb3( vsum );
-  _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
  dst[0] = u.c[0];
  dst[1] = u.c[1];
  dst[2] = u.c[2];
@ -1222,7 +1222,7 @@ static void imStaticKernel4sRGB( unsigned char *dst, int pointx, int pointy, imS

 #if CPU_SSE2_SUPPORT
  vsum = linear2srgb3( vsum );
-  _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
 #else
  dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
  dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
@ -1291,7 +1291,7 @@ static void imStaticKernel3sRGB_Core( unsigned char *dst, int pointx, int pointy
 #endif

  vsum0 = linear2srgb3( vsum0 );
-  _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
  dst[0] = u.c[0];
  dst[1] = u.c[1];
  dst[2] = u.c[2];
@ -1332,7 +1332,7 @@ static void imStaticKernel4sRGB_Core( unsigned char *dst, int pointx, int pointy
  }

  vsum = linear2srgb3( vsum );
-  _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );

  return;
 }
@ -2729,7 +2729,7 @@ static void imDynamicKernel2sRGB( unsigned char *dst, imGenericMatrixState *stat
    uint32_t i;
  } u;
  vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
-  _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
  dst[0] = u.c[0];
  dst[1] = u.c[1];
 #else
@ -2800,7 +2800,7 @@ static void imDynamicKernel3sRGB( unsigned char *dst, imGenericMatrixState *stat
    uint32_t i;
  } u;
  vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
-  _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
  dst[0] = u.c[0];
  dst[1] = u.c[1];
  dst[2] = u.c[2];
@ -2871,7 +2871,7 @@ static void imDynamicKernel4sRGB( unsigned char *dst, imGenericMatrixState *stat

 #if CPU_SSE2_SUPPORT
  vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
-  _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+  _mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
 #else
  matrixsum = 1.0f / matrixsum;
  sum0 *= matrixsum;