diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b8e74b41..f8e450921 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,15 +107,26 @@ if((WIN32 AND NOT MINGW) OR APPLE)
add_subdirectory("${PROJECT_SOURCE_DIR}/lib/libpng")
include_directories("${PROJECT_SOURCE_DIR}/lib/libpng")
- #build jpeg library
- add_subdirectory("${PROJECT_SOURCE_DIR}/lib/jpeglib")
- include_directories("${PROJECT_SOURCE_DIR}/lib/jpeglib")
-
set(PNG_PNG_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/lib/libpng/")
set(PNG_LIBRARY png15_static)
+endif()
+
+# Add jpeg-turbo library
+if (APPLE)
+ add_subdirectory("${PROJECT_SOURCE_DIR}/lib/jpeglib")
+ include_directories("${PROJECT_SOURCE_DIR}/lib/jpeglib")
set(JPEG_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/lib/jpeglib/")
set(JPEG_LIBRARY jpeglib)
+else()
+ find_package(JPEG REQUIRED)
+ include_directories(${JPEG_INCLUDE_DIR})
endif()
+
+if(NOT SERVER_ONLY AND NOT USE_GLES2)
+ add_subdirectory("${PROJECT_SOURCE_DIR}/lib/graphics_utils")
+ include_directories("${PROJECT_SOURCE_DIR}/lib/graphics_utils")
+endif()
+
# Build the irrlicht library
add_subdirectory("${PROJECT_SOURCE_DIR}/lib/irrlicht")
include_directories("${PROJECT_SOURCE_DIR}/lib/irrlicht/include")
@@ -368,11 +379,12 @@ target_link_libraries(supertuxkart
${OGGVORBIS_LIBRARIES}
${OPENAL_LIBRARY}
${FREETYPE_LIBRARIES}
+ ${JPEG_LIBRARIES}
)
if(NOT SERVER_ONLY)
if(NOT USE_GLES2)
- target_link_libraries(supertuxkart ${OPENGL_LIBRARIES} glew)
+ target_link_libraries(supertuxkart ${OPENGL_LIBRARIES} glew graphics_utils)
else()
target_link_libraries(supertuxkart EGL GLESv2)
endif()
diff --git a/android/generate_assets.sh b/android/generate_assets.sh
index 2fdec9214..30bf65df2 100755
--- a/android/generate_assets.sh
+++ b/android/generate_assets.sh
@@ -29,13 +29,16 @@ export ASSETS_PATHS="../data \
export ASSETS_DIRS="library models music sfx textures"
export TEXTURE_SIZE=256
-export SOUND_QUALITY=64
+export SOUND_QUALITY=42
+export SOUND_MONO=1
+export SOUND_SAMPLE=32000
export RUN_OPTIMIZE_SCRIPT=0
export DECREASE_QUALITY=1
################################################################################
+export LANG=C
cd "`dirname "$0"`"
@@ -191,7 +194,22 @@ convert_sound()
oggdec "$FILE" -o tmp.wav
if [ -s tmp.wav ]; then
- oggenc --downmix -b $SOUND_QUALITY tmp.wav -o tmp.ogg
+ OGGENC_CMD=""
+
+ if [ "$SOUND_MONO" -gt 0 ]; then
+ OGGENC_CMD="$OGGENC_CMD --downmix"
+ fi
+
+ OGG_RATE=`ogginfo "$FILE" | grep "Rate: " | cut -f 2 -d " " \
+ | grep -o '[0-9]*'`
+
+ if [ ! -z "$OGG_RATE" ] && [ "$OGG_RATE" -gt "32000" ]; then
+ OGGENC_CMD="$OGGENC_CMD --resample 32000"
+ fi
+
+ OGGENC_CMD="$OGGENC_CMD -b $SOUND_QUALITY"
+
+ oggenc $OGGENC_CMD tmp.wav -o tmp.ogg
fi
if [ -s tmp.ogg ]; then
diff --git a/data/gui/custom_video_settings.stkgui b/data/gui/custom_video_settings.stkgui
index 3a0a1b9a8..be2b29448 100644
--- a/data/gui/custom_video_settings.stkgui
+++ b/data/gui/custom_video_settings.stkgui
@@ -147,16 +147,8 @@
-
-
-
-
@@ -168,9 +160,9 @@
-
+
-
+
diff --git a/data/shaders/object_pass2.frag b/data/shaders/object_pass2.frag
index 838570ead..93f0a0fb0 100644
--- a/data/shaders/object_pass2.frag
+++ b/data/shaders/object_pass2.frag
@@ -40,7 +40,7 @@ void main(void)
col = vec4(new_color.r, new_color.g, new_color.b, col.a);
}
-#ifdef GL_ES
+#if defined(GL_ES) && !defined(Advanced_Lighting_Enabled)
col.xyz *= color.xyz;
#else
col.xyz *= pow(color.xyz, vec3(2.2));
diff --git a/data/shaders/object_unlit.frag b/data/shaders/object_unlit.frag
index 15c775fbd..d13305612 100644
--- a/data/shaders/object_unlit.frag
+++ b/data/shaders/object_unlit.frag
@@ -16,7 +16,13 @@ void main(void)
col.xyz = pow(col.xyz, vec3(2.2));
#endif
#endif
- col.xyz *= pow(color.xyz, vec3(2.2));
if (col.a < 0.5) discard;
+
+#if defined(GL_ES) && !defined(Advanced_Lighting_Enabled)
+ col.xyz *= color.xyz;
+#else
+ col.xyz *= pow(color.xyz, vec3(2.2));
+#endif
+
FragColor = vec4(col.xyz, 1.);
}
diff --git a/data/shaders/objectref_pass2.frag b/data/shaders/objectref_pass2.frag
index a6374bd0d..3dacf6bdb 100644
--- a/data/shaders/objectref_pass2.frag
+++ b/data/shaders/objectref_pass2.frag
@@ -25,8 +25,13 @@ void main(void)
col.xyz = pow(col.xyz, vec3(2.2));
#endif
#endif
- col.xyz *= pow(color.xyz, vec3(2.2));
if (col.a * color.a < 0.5) discard;
+
+#if defined(GL_ES) && !defined(Advanced_Lighting_Enabled)
+ col.xyz *= color.xyz;
+#else
+ col.xyz *= pow(color.xyz, vec3(2.2));
+#endif
float mask = texture(colorization_mask, uv).a;
if (color_change.x > 0.0)
diff --git a/data/shaders/tonemap.frag b/data/shaders/tonemap.frag
index 13c6b32ce..251f18ac1 100644
--- a/data/shaders/tonemap.frag
+++ b/data/shaders/tonemap.frag
@@ -13,7 +13,9 @@ void main()
// Uncharted2 tonemap with Auria's custom coefficients
vec4 perChannel = (col * (6.9 * col + .5)) / (col * (5.2 * col + 1.7) + 0.06);
+#if !(defined(GL_ES) && defined(Advanced_Lighting_Enabled))
perChannel = pow(perChannel, vec4(2.2));
+#endif
vec2 inside = uv - 0.5;
float vignette = 1. - dot(inside, inside) * vignette_weight;
diff --git a/data/shaders/utils/getLightFactor.frag b/data/shaders/utils/getLightFactor.frag
index 4fb754069..af0feed66 100644
--- a/data/shaders/utils/getLightFactor.frag
+++ b/data/shaders/utils/getLightFactor.frag
@@ -14,10 +14,6 @@ vec3 getLightFactor(vec3 diffuseMatColor, vec3 specularMatColor, float specMapVa
vec3 DiffuseComponent = texture(DiffuseMap, tc).xyz;
vec3 SpecularComponent = texture(SpecularMap, tc).xyz;
float ao = texture(SSAO, tc).x;
-#ifdef GL_ES
- DiffuseComponent = pow(DiffuseComponent, vec3(1. / 2.2));
- SpecularComponent = pow(SpecularComponent, vec3(1. / 2.2));
-#endif
vec3 tmp = diffuseMatColor * DiffuseComponent * (1. - specMapValue) + specularMatColor * SpecularComponent * specMapValue;
vec3 emitCol = diffuseMatColor.xyz * diffuseMatColor.xyz * diffuseMatColor.xyz * 15.;
return tmp * ao + (emitMapValue * emitCol);
diff --git a/lib/graphics_utils/CMakeLists.txt b/lib/graphics_utils/CMakeLists.txt
new file mode 100644
index 000000000..0c238f0b6
--- /dev/null
+++ b/lib/graphics_utils/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 2.6)
+if (UNIX OR MINGW)
+ add_definitions(-O3)
+endif()
+add_library(graphics_utils STATIC
+ mipmap/cpusimd.c
+ mipmap/imgresize.c
+)
diff --git a/lib/graphics_utils/mipmap/cpusimd.c b/lib/graphics_utils/mipmap/cpusimd.c
new file mode 100644
index 000000000..6e29dd3bb
--- /dev/null
+++ b/lib/graphics_utils/mipmap/cpusimd.c
@@ -0,0 +1,571 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2008-2016 Alexis Naveros.
+ *
+ *
+ * The SIMD trigonometry functions are Copyright (C) 2007 Julien Pommier
+ * See copyright notice for simd4f_sin_ps(), simd4f_cos_ps(), simd4f_sincos_ps()
+ *
+ *
+ * Some functions are Copyright (C) 2008 José Fonseca
+ * See copyright notice for simd4f_exp2_ps(), simd4f_log2_ps(), simd4f_pow_ps()
+ *
+ *
+ * Portions developed under contract to the SURVICE Engineering Company.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * -----------------------------------------------------------------------------
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+#include "cpusimd.h"
+
+
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846
+#endif
+
+
+////
+
+
+#if CPU_SSE_SUPPORT
+
+const uint32_t CPU_ALIGN16 simd4fSignMask[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+const uint32_t CPU_ALIGN16 simd4fSignMaskInv[4] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
+const float CPU_ALIGN16 simd4fHalf[4] = { 0.5, 0.5, 0.5, 0.5 };
+const float CPU_ALIGN16 simd4fOne[4] = { 1.0, 1.0, 1.0, 1.0 };
+const float CPU_ALIGN16 simd4fTwo[4] = { 2.0, 2.0, 2.0, 2.0 };
+const float CPU_ALIGN16 simd4fThree[4] = { 3.0, 3.0, 3.0, 3.0 };
+const uint32_t CPU_ALIGN16 simd4uOne[4] = { 1, 1, 1, 1 };
+const uint32_t CPU_ALIGN16 simd4uOneInv[4] = { ~1, ~1, ~1, ~1 };
+const uint32_t CPU_ALIGN16 simd4uTwo[4] = { 2, 2, 2, 2 };
+const uint32_t CPU_ALIGN16 simd4uFour[4] = { 4, 4, 4, 4 };
+const float CPU_ALIGN16 simd4fQuarter[4] = { 0.25, 0.25, 0.25, 0.25 };
+const float CPU_ALIGN16 simd4fPi[4] = { M_PI, M_PI, M_PI, M_PI };
+const float CPU_ALIGN16 simd4fZeroOneTwoThree[4] = { 0.0, 1.0, 2.0, 3.0 };
+const uint32_t CPU_ALIGN16 simd4fAlphaMask[4] = { 0x00000000, 0x00000000, 0x00000000, 0xffffffff };
+const float CPU_ALIGN16 simd4f255[4] = { 255.0f, 255.0f, 255.0f, 255.0f };
+const float CPU_ALIGN16 simd4f255Inv[4] = { 1.0f/255.0f, 1.0f/255.0f, 1.0f/255.0f, 1.0f/255.0f };
+
+#endif
+
+
+////
+
+
+#if CPU_SSE2_SUPPORT
+
+
+/* Copyright (C) 2007 Julien Pommier
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ (this is the zlib license)
+*/
+
+static const float CPU_ALIGN16 simd4f_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
+static const float CPU_ALIGN16 simd4f_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
+static const float CPU_ALIGN16 simd4f_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
+static const float CPU_ALIGN16 simd4f_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
+static const float CPU_ALIGN16 simd4f_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
+static const float CPU_ALIGN16 simd4f_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
+static const float CPU_ALIGN16 simd4f_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
+static const float CPU_ALIGN16 simd4f_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
+static const float CPU_ALIGN16 simd4f_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
+static const float CPU_ALIGN16 simd4f_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
+
+__m128 simd4f_sin_ps( __m128 x )
+{
+ __m128 xmm1, xmm2, xmm3, sign_bit, y;
+ __m128i emm0, emm2;
+
+ xmm2 = _mm_setzero_ps();
+
+ sign_bit = x;
+ /* take the absolute value */
+ x = _mm_and_ps( x, *(__m128 *)simd4fSignMaskInv );
+ /* extract the sign bit (upper one) */
+ sign_bit = _mm_and_ps(sign_bit, *(__m128 *)simd4fSignMask);
+
+ /* scale by 4/Pi */
+ y = _mm_mul_ps(x, *(__m128 *)simd4f_cephes_FOPI);
+
+ /* store the integer part of y in mm0 */
+ emm2 = _mm_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, *(__m128i*)simd4uOne);
+ emm2 = _mm_and_si128(emm2, *(__m128i*)simd4uOneInv);
+ y = _mm_cvtepi32_ps(emm2);
+
+ /* get the swap sign flag */
+ emm0 = _mm_and_si128(emm2, *(__m128i*)simd4uFour);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ /* get the polynom selection mask
+ there is one polynom for 0 <= x <= Pi/4
+ and another one for Pi/4
+ #define CPU_MMX_SUPPORT (1)
+#endif
+#if __SSE__ || _M_X64 || _M_IX86_FP >= 1 || CPU_ENABLE_SSE
+ #include
+ #define CPU_SSE_SUPPORT (1)
+#endif
+#if __SSE2__ || _M_X64 || _M_IX86_FP >= 2 || CPU_ENABLE_SSE2
+ #include
+ #define CPU_SSE2_SUPPORT (1)
+#endif
+#if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3
+ #include
+ #define CPU_SSE3_SUPPORT (1)
+#endif
+#if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3
+ #include
+ #define CPU_SSSE3_SUPPORT (1)
+#endif
+#if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1
+ #include
+ #define CPU_SSE4_1_SUPPORT (1)
+#endif
+#if __SSE4_2__ || CPU_ENABLE_SSE4_2
+ #include
+ #define CPU_SSE4_2_SUPPORT (1)
+#endif
+#if __SSE4A__ || CPU_ENABLE_SSE4A
+ #include
+ #define CPU_SSE4A_SUPPORT (1)
+#endif
+#if __AVX__ || CPU_ENABLE_AVX
+ #include
+ #define CPU_AVX_SUPPORT (1)
+#endif
+#if __AVX2__ || CPU_ENABLE_AVX2
+ #include
+ #define CPU_AVX2_SUPPORT (1)
+#endif
+#if __XOP__ || CPU_ENABLE_XOP
+ #include
+ #define CPU_XOP_SUPPORT (1)
+#endif
+#if __FMA3__ || CPU_ENABLE_FMA3
+ #include
+ #define CPU_FMA3_SUPPORT (1)
+#endif
+#if __FMA4__ || CPU_ENABLE_FMA4
+ #include
+ #define CPU_FMA4_SUPPORT (1)
+#endif
+#if __RDRND__ || CPU_ENABLE_RDRND
+ #include
+ #define CPU_RDRND_SUPPORT (1)
+#endif
+#if __POPCNT__ || CPU_ENABLE_POPCNT
+ #include
+ #define CPU_POPCNT_SUPPORT (1)
+#endif
+#if __LZCNT__ || CPU_ENABLE_LZCNT
+ #include
+ #define CPU_LZCNT_SUPPORT (1)
+#endif
+#if __F16C__ || CPU_ENABLE_F16C
+ #include
+ #define CPU_F16C_SUPPORT (1)
+#endif
+#if __BMI__ || CPU_ENABLE_BMI
+ #include
+ #define CPU_BMI_SUPPORT (1)
+#endif
+#if __BMI2__ || CPU_ENABLE_BMI2
+ #include
+ #define CPU_BMI2_SUPPORT (1)
+#endif
+#if __TBM__ || CPU_ENABLE_TBM
+ #include
+ #define CPU_TBM_SUPPORT (1)
+#endif
+
+
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+ #define CPU_ALIGN16 __attribute__((aligned(16)))
+ #define CPU_ALIGN32 __attribute__((aligned(32)))
+ #define CPU_ALIGN64 __attribute__((aligned(64)))
+#elif defined(_MSC_VER)
+ #define CPU_ALIGN16 __declspec(align(16))
+ #define CPU_ALIGN64 __declspec(align(64))
+#else
+ #define CPU_ALIGN16
+ #define CPU_ALIGN32
+ #define CPU_ALIGN64
+ #warning "SSE/AVX Disabled: Unsupported Compiler."
+ #undef CPU_SSE_SUPPORT
+ #undef CPU_SSE2_SUPPORT
+ #undef CPU_SSE3_SUPPORT
+ #undef CPU_SSSE3_SUPPORT
+ #undef CPU_SSE4_1_SUPPORT
+ #undef CPU_SSE4_2_SUPPORT
+ #undef CPU_AVX_SUPPORT
+ #undef CPU_AVX2_SUPPORT
+ #undef CPU_XOP_SUPPORT
+ #undef CPU_FMA3_SUPPORT
+ #undef CPU_FMA4_SUPPORT
+#endif
+
+
+////
+
+
+#if CPU_SSE_SUPPORT
+ #define CPU_APPROX_DIV_FLOAT(z,w) _mm_cvtss_f32(_mm_mul_ss(_mm_set_ss(z),_mm_rcp_ss(_mm_set_ss(w))))
+ #define CPU_APPROX_SQRT_FLOAT(z) _mm_cvtss_f32(_mm_mul_ss(_mm_set_ss(z),_mm_rsqrt_ss(_mm_set_ss(z))))
+ #define CPU_APPROX_RSQRT_FLOAT(z) _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(z)))
+ #define CPU_APPROX_DIVSQRT_FLOAT(z,w) _mm_cvtss_f32(_mm_mul_ss(_mm_set_ss(z),_mm_rsqrt_ss(_mm_set_ss(w))))
+#else
+ #define CPU_APPROX_DIV_FLOAT(z,w) ((z)/(w))
+ #define CPU_APPROX_SQRT_FLOAT(z) (sqrtf(z))
+ #define CPU_APPROX_RSQRT_FLOAT(z) (1.0/sqrtf(z))
+ #define CPU_APPROX_DIVSQRT_FLOAT(z,w) ((z)/sqrtf(w))
+#endif
+
+
+#if CPU_SSE3_SUPPORT
+ #define CPU_HADD_PS(vx,vy) _mm_hadd_ps(vx,vy)
+ #define CPU_HADD_PD(vx,vy) _mm_hadd_pd(vx,vy)
+#elif CPU_SSE_SUPPORT
+ static inline __m128 CPU_HADD_PS( __m128 vx, __m128 vy )
+ {
+ __m128 vh, vl;
+ vh = _mm_shuffle_ps( vx, vy, _MM_SHUFFLE(3,1,3,1) );
+ vl = _mm_shuffle_ps( vx, vy, _MM_SHUFFLE(2,0,2,0) );
+ return _mm_add_ps( vh, vl );
+ }
+ #define CPU_HADD_PD(vx,vy) _mm_add_sd(vx,_mm_unpackhi_pd(vy,vy))
+#endif
+
+
+#if CPU_SSE4_1_SUPPORT
+ #define CPU_CVT_U8_TO_I32(x,vzero) _mm_cvtepu8_epi32(x)
+ #define CPU_CVT_S8_TO_I32(x,vzero) _mm_cvtepi8_epi32(x)
+#elif CPU_SSE2_SUPPORT
+ #define CPU_CVT_U8_TO_I32(x,vzero) _mm_unpacklo_epi16(_mm_unpacklo_epi8((x),(vzero)),(vzero))
+static inline __m128i CPU_CVT_S8_TO_I32( __m128i vx, __m128i vzero )
+{
+ __m128i vsign;
+ vsign = _mm_cmpgt_epi8( vzero, vx );
+ return _mm_unpacklo_epi16( _mm_unpacklo_epi8( vx, vsign ), _mm_unpacklo_epi8( vsign, vsign ) );
+}
+#endif
+
+
+#if CPU_SSE4_1_SUPPORT
+ #define CPU_BLENDV_PS(x,y,mask) _mm_blendv_ps(x,y,mask)
+ #define CPU_BLENDV_PD(x,y,mask) _mm_blendv_pd(x,y,mask)
+#elif CPU_SSE2_SUPPORT
+ #define CPU_BLENDV_PS(x,y,mask) _mm_or_ps(_mm_andnot_ps(mask,x),_mm_and_ps(y,mask))
+ #define CPU_BLENDV_PD(x,y,mask) _mm_or_pd(_mm_andnot_pd(mask,x),_mm_and_pd(y,mask))
+#endif
+
+
+
+/*
+CPU_FMADD = ((f0*f1)+t0)
+CPU_FMSUB = ((f0*f1)-t0)
+*/
+#if CPU_FMA3_SUPPORT
+ #define CPU_FMADD_SS(f0,f1,t0) _mm_fmadd_ss(f0,f1,t0)
+ #define CPU_FMADD_PS(f0,f1,t0) _mm_fmadd_ps(f0,f1,t0)
+ #define CPU_FMADD_SD(f0,f1,t0) _mm_fmadd_sd(f0,f1,t0)
+ #define CPU_FMADD_PD(f0,f1,t0) _mm_fmadd_pd(f0,f1,t0)
+ #define CPU_FMSUB_SS(f0,f1,t0) _mm_fmsub_ss(f0,f1,t0)
+ #define CPU_FMSUB_PS(f0,f1,t0) _mm_fmsub_ps(f0,f1,t0)
+ #define CPU_FMSUB_SD(f0,f1,t0) _mm_fmsub_sd(f0,f1,t0)
+ #define CPU_FMSUB_PD(f0,f1,t0) _mm_fmsub_pd(f0,f1,t0)
+ #define CPU_FMADD256_SS(f0,f1,t0) _mm256_fmadd_ss(f0,f1,t0)
+ #define CPU_FMADD256_PS(f0,f1,t0) _mm256_fmadd_ps(f0,f1,t0)
+ #define CPU_FMADD256_SD(f0,f1,t0) _mm256_fmadd_sd(f0,f1,t0)
+ #define CPU_FMADD256_PD(f0,f1,t0) _mm256_fmadd_pd(f0,f1,t0)
+ #define CPU_FMSUB256_SS(f0,f1,t0) _mm256_fmsub_ss(f0,f1,t0)
+ #define CPU_FMSUB256_PS(f0,f1,t0) _mm256_fmsub_ps(f0,f1,t0)
+ #define CPU_FMSUB256_SD(f0,f1,t0) _mm256_fmsub_sd(f0,f1,t0)
+ #define CPU_FMSUB256_PD(f0,f1,t0) _mm256_fmsub_pd(f0,f1,t0)
+#elif CPU_FMA4_SUPPORT
+ #define CPU_FMADD_SS(f0,f1,t0) _mm_macc_ss(f0,f1,t0)
+ #define CPU_FMADD_PS(f0,f1,t0) _mm_macc_ps(f0,f1,t0)
+ #define CPU_FMADD_SD(f0,f1,t0) _mm_macc_sd(f0,f1,t0)
+ #define CPU_FMADD_PD(f0,f1,t0) _mm_macc_pd(f0,f1,t0)
+ #define CPU_FMSUB_SS(f0,f1,t0) _mm_msub_ss(f0,f1,t0)
+ #define CPU_FMSUB_PS(f0,f1,t0) _mm_msub_ps(f0,f1,t0)
+ #define CPU_FMSUB_SD(f0,f1,t0) _mm_msub_sd(f0,f1,t0)
+ #define CPU_FMSUB_PD(f0,f1,t0) _mm_msub_pd(f0,f1,t0)
+ #define CPU_FMADD256_SS(f0,f1,t0) _mm256_macc_ss(f0,f1,t0)
+ #define CPU_FMADD256_PS(f0,f1,t0) _mm256_macc_ps(f0,f1,t0)
+ #define CPU_FMADD256_SD(f0,f1,t0) _mm256_macc_sd(f0,f1,t0)
+ #define CPU_FMADD256_PD(f0,f1,t0) _mm256_macc_pd(f0,f1,t0)
+ #define CPU_FMSUB256_SS(f0,f1,t0) _mm256_msub_ss(f0,f1,t0)
+ #define CPU_FMSUB256_PS(f0,f1,t0) _mm256_msub_ps(f0,f1,t0)
+ #define CPU_FMSUB256_SD(f0,f1,t0) _mm256_msub_sd(f0,f1,t0)
+ #define CPU_FMSUB256_PD(f0,f1,t0) _mm256_msub_pd(f0,f1,t0)
+#else
+ #define CPU_FMADD_SS(f0,f1,t0) _mm_add_ss(_mm_mul_ss(f0,f1),t0)
+ #define CPU_FMADD_PS(f0,f1,t0) _mm_add_ps(_mm_mul_ps(f0,f1),t0)
+ #define CPU_FMADD_SD(f0,f1,t0) _mm_add_sd(_mm_mul_sd(f0,f1),t0)
+ #define CPU_FMADD_PD(f0,f1,t0) _mm_add_pd(_mm_mul_pd(f0,f1),t0)
+ #define CPU_FMSUB_SS(f0,f1,t0) _mm_sub_ss(_mm_mul_ss(f0,f1),t0)
+ #define CPU_FMSUB_PS(f0,f1,t0) _mm_sub_ps(_mm_mul_ps(f0,f1),t0)
+ #define CPU_FMSUB_SD(f0,f1,t0) _mm_sub_sd(_mm_mul_sd(f0,f1),t0)
+ #define CPU_FMSUB_PD(f0,f1,t0) _mm_sub_pd(_mm_mul_pd(f0,f1),t0)
+ #define CPU_FMADD256_SS(f0,f1,t0) _mm256_add_ss(_mm256_mul_ss(f0,f1),t0)
+ #define CPU_FMADD256_PS(f0,f1,t0) _mm256_add_ps(_mm256_mul_ps(f0,f1),t0)
+ #define CPU_FMADD256_SD(f0,f1,t0) _mm256_add_sd(_mm256_mul_sd(f0,f1),t0)
+ #define CPU_FMADD256_PD(f0,f1,t0) _mm256_add_pd(_mm256_mul_pd(f0,f1),t0)
+ #define CPU_FMSUB256_SS(f0,f1,t0) _mm256_sub_ss(_mm256_mul_ss(f0,f1),t0)
+ #define CPU_FMSUB256_PS(f0,f1,t0) _mm256_sub_ps(_mm256_mul_ps(f0,f1),t0)
+ #define CPU_FMSUB256_SD(f0,f1,t0) _mm256_sub_sd(_mm256_mul_sd(f0,f1),t0)
+ #define CPU_FMSUB256_PD(f0,f1,t0) _mm256_sub_pd(_mm256_mul_pd(f0,f1),t0)
+#endif
+
+
+////
+
+
+#if CPU_SSE_SUPPORT
+
+extern const uint32_t simd4fSignMask[4];
+extern const uint32_t simd4fSignMaskInv[4];
+extern const float simd4fHalf[4];
+extern const float simd4fOne[4];
+extern const float simd4fTwo[4];
+extern const float simd4fThree[4];
+extern const uint32_t simd4uOne[4];
+extern const uint32_t simd4uOneInv[4];
+extern const uint32_t simd4uTwo[4];
+extern const uint32_t simd4uFour[4];
+extern const float simd4fQuarter[4];
+extern const float simd4fPi[4];
+extern const float simd4fZeroOneTwoThree[4];
+extern const uint32_t simd4fAlphaMask[4];
+extern const float simd4f255[4];
+extern const float simd4f255Inv[4];
+
+#endif
+
+
+#if CPU_SSE2_SUPPORT
+
+/* Input range between -8192 and 8192 */
+__m128 simd4f_sin_ps( __m128 x );
+__m128 simd4f_cos_ps( __m128 x );
+void simd4f_sincos_ps( __m128 x, __m128 *s, __m128 *c );
+
+#endif
+
+#if CPU_SSE2_SUPPORT
+
+__m128 simd4f_exp2_ps( __m128 x );
+__m128 simd4f_log2_ps( __m128 x );
+__m128 simd4f_pow_ps( __m128 x, __m128 y );
+
+#endif
+
+#if CPU_SSE2_SUPPORT
+
+__m128 simd4f_pow12d5_ps( __m128 arg );
+__m128 simd4f_pow5d12_ps( __m128 arg );
+
+#endif
+
+
+////
+
+
+#if CPU_SSE2_SUPPORT
+
+#ifndef CC_ALWAYSINLINE
+ #if defined(__GNUC__) || defined(__INTEL_COMPILER)
+ #define CC_ALWAYSINLINE __attribute__((always_inline))
+ #else
+ #define CC_ALWAYSINLINE
+ #endif
+#endif
+
+static inline CC_ALWAYSINLINE __m128 simd4f_pow12d5_inline_ps( __m128 vx )
+{
+ __m128 vpow, vpwsqrtinv, vpwsqrt, vx2;
+ vx2 = _mm_mul_ps( vx, vx );
+ vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, _mm_set1_ps( 5417434112.0f ) ) ) ), _mm_set1_ps( 0.8f ) ) ) );
+ vpwsqrtinv = _mm_rsqrt_ps( vpow );
+ vpwsqrt = _mm_mul_ps( vpow, vpwsqrtinv );
+ return _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx2, vpwsqrt ), _mm_mul_ps( _mm_mul_ps( _mm_mul_ps( vx2, vx ), vpwsqrtinv ), _mm_rsqrt_ps( vpwsqrt ) ) ), _mm_set1_ps( 0.51011878327f ) );
+}
+
+static inline CC_ALWAYSINLINE __m128 simd4f_pow5d12_inline_ps( __m128 vx )
+{
+ __m128 vpow;
+ vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, _mm_set1_ps( 6521909350804488192.0f ) ) ) ), _mm_set1_ps( 0.666666666666f ) ) ) );
+ vx = _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx, vpow ), _mm_mul_ps( _mm_mul_ps( vx, vx ), _mm_rsqrt_ps( vpow ) ) ), _mm_set1_ps( 0.5290553722f ) );
+#if 0
+ vx = _mm_mul_ps( vx, _mm_rsqrt_ps( vx ) );
+ vx = _mm_mul_ps( vx, _mm_rsqrt_ps( vx ) );
+#else
+ vx = _mm_sqrt_ps( vx );
+ vx = _mm_sqrt_ps( vx );
+#endif
+ return vx;
+}
+
+#endif
+
+
+////
+
+
+#if CPU_SSE_SUPPORT
+
+static inline void simdPrintDebugSSE4f( char *str, __m128 v )
+{
+ float CPU_ALIGN16 store[4];
+ _mm_store_ps( (void *)store, v );
+ printf( "%s %f %f %f %f\n", str, (double)store[0], (double)store[1], (double)store[2], (double)store[3] );
+ return;
+}
+
+static inline void simdPrintDebugSSE2d( char *str, __m128d v )
+{
+ double CPU_ALIGN16 store[2];
+ _mm_store_pd( (void *)store, v );
+ printf( "%s %f %f\n", str, store[0], store[1] );
+ return;
+}
+
+static inline void simdPrintDebugSSE16u8( char *str, __m128i v )
+{
+ uint8_t CPU_ALIGN16 store[16];
+ _mm_store_si128( (void *)store, v );
+ printf( "%s %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", str, store[0], store[1], store[2], store[3], store[4], store[5], store[6], store[7], store[8], store[9], store[10], store[11], store[12], store[13], store[14], store[15] );
+ return;
+}
+
+static inline void simdPrintDebugSSE8u16( char *str, __m128i v )
+{
+ uint16_t CPU_ALIGN16 store[8];
+ _mm_store_si128( (void *)store, v );
+ printf( "%s %d %d %d %d %d %d %d %d\n", str, store[0], store[1], store[2], store[3], store[4], store[5], store[6], store[7] );
+ return;
+}
+
+static inline void simdPrintDebugSSE4u32( char *str, __m128i v )
+{
+ uint32_t CPU_ALIGN16 store[4];
+ _mm_store_si128( (void *)store, v );
+ printf( "%s %d %d %d %d\n", str, store[0], store[1], store[2], store[3] );
+ return;
+}
+
+static inline void simdPrintDebugSSE2u64( char *str, __m128i v )
+{
+ uint64_t CPU_ALIGN16 store[2];
+ _mm_store_si128( (void *)store, v );
+ printf( "%s %lld %lld\n", str, (long long)store[0], (long long)store[1] );
+ return;
+}
+
+#endif
+
+
+////
+
+
+#endif
+
diff --git a/lib/graphics_utils/mipmap/img.h b/lib/graphics_utils/mipmap/img.h
new file mode 100644
index 000000000..17044a517
--- /dev/null
+++ b/lib/graphics_utils/mipmap/img.h
@@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2007-2017 Alexis Naveros.
+ * Portions developed under contract to the SURVICE Engineering Company.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * -----------------------------------------------------------------------------
+ */
+
+#ifndef IMG_H
+#define IMG_H
+
+
+typedef struct
+{
+ int width;
+ int height;
+ int type;
+ int bytesperpixel;
+ int bytesperline;
+} imgFormat;
+
+enum
+{
+ IMG_FORMAT_TYPE_ANY,
+ IMG_FORMAT_TYPE_RGB24,
+ IMG_FORMAT_TYPE_BGR24,
+ IMG_FORMAT_TYPE_RGBX32,
+ IMG_FORMAT_TYPE_BGRX32,
+ IMG_FORMAT_TYPE_RGBA32,
+ IMG_FORMAT_TYPE_BGRA32,
+ IMG_FORMAT_TYPE_GRAYSCALE,
+ IMG_FORMAT_TYPE_GRAYALPHA
+};
+
+typedef struct
+{
+ imgFormat format;
+ void *data;
+} imgImage;
+
+
+////
+
+
+void imgCopyRect( imgImage *image, int dstx, int dsty, int srcx, int srcy, int sizex, int sizey );
+
+void (*imgBlendGetFunction( imgImage *dstimage, imgImage *srcimage ))( imgImage *dstimage, int dstx, int dsty, imgImage *srcimage );
+int imgBlendImage( imgImage *dstimage, int dstx, int dsty, imgImage *srcimage );
+
+void imgAllocCopy( imgImage *dst, imgImage *src );
+void imgAllocCopyExtendBorder( imgImage *dstimage, imgImage *srcimage, int extendsize );
+void imgAllocExtractChannel( imgImage *dst, imgImage *src, int channelindex );
+void imgAllocExtractChannelExtendBorder( imgImage *dstimage, imgImage *srcimage, int channelindex, int extendsize );
+void imgAllocCopyChannelToAlpha( imgImage *dstimage, imgImage *srcimage, int channelindex, unsigned char r, unsigned char g, unsigned char b );
+void imgAllocAdjustBrightnessContrast( imgImage *dstimage, imgImage *srcimage, float brightness, float contrast );
+
+void imgFree( imgImage *image );
+
+
+#endif
+
diff --git a/lib/graphics_utils/mipmap/imgresize.c b/lib/graphics_utils/mipmap/imgresize.c
new file mode 100644
index 000000000..b376bc46d
--- /dev/null
+++ b/lib/graphics_utils/mipmap/imgresize.c
@@ -0,0 +1,4098 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2014-2017 Alexis Naveros.
+ * Portions developed under contract to the SURVICE Engineering Company.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * -----------------------------------------------------------------------------
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+#include "cpusimd.h"
+
+#include "img.h"
+#include "imgresize.h"
+
+
+////
+
+
+#define IM_RESIZE_DEBUG (0)
+#define IM_RESIZE_DEBUG_PROGRESS (0)
+
+
+////
+
+
+#ifndef M_PI
+ #define M_PI (3.14159265358979323846)
+#endif
+
+#ifndef ADDRESS
+ #define ADDRESS(p,o) ((void *)(((char *)p)+(o)))
+#endif
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+ #define CC_ALWAYSINLINE __attribute__((always_inline))
+ #if __STDC_VERSION__ >= 199901L
+ #define CC_RESTRICT restrict
+ #else
+ #define CC_RESTRICT
+ #endif
+#elif defined(_MSC_VER)
+ #define CC_ALWAYSINLINE __forceinline
+ #define CC_RESTRICT __restrict
+#else
+ #define CC_ALWAYSINLINE
+ #define CC_RESTRICT
+#endif
+
+static inline CC_ALWAYSINLINE uint32_t ccIsPow2Int32( uint32_t v )
+{
+ return ( ( v & ( v - 1 ) ) == 0 );
+}
+
+#define ROUND_POSITIVE_FLOAT(x) ((int)((x)+0.5f))
+
+
+////
+
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__i386__) || defined(__i386) || defined(i386) || defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86) || defined(_X86_)
+
+/* Input is 0.0,255.0, output is 0.0,1.0 */
+static inline CC_ALWAYSINLINE float srgb2linear( float v )
+{
+ float v2, vpow, vpwsqrt;
+ union
+ {
+ int32_t i;
+ float f;
+ } u;
+ if( v <= (0.04045f*255.0f) )
+ v = v * ( (1.0f/12.92f)*(1.0f/255.0f) );
+ else
+ {
+ v = ( v + (0.055f*255.0f) ) * ( (1.0f/1.055f)*(1.0f/255.0f) );
+ v2 = v * v;
+ u.f = v * 5417434112.0f;
+ u.i = (int32_t)ROUND_POSITIVE_FLOAT( (float)u.i * 0.8f );
+ vpow = u.f;
+ vpwsqrt = sqrtf( vpow );
+ v = ( ( v2 * vpwsqrt ) + ( ( ( v2 * v ) / vpwsqrt ) / sqrtf( vpwsqrt ) ) ) * 0.51011878327f;
+ }
+ return v;
+}
+
+/* Input is 0.0,1.0, output is 0.0,255.0 */
+static inline CC_ALWAYSINLINE float linear2srgb( float v )
+{
+ float vpow;
+ union
+ {
+ int32_t i;
+ float f;
+ } u;
+ if( v <= 0.0031308f )
+ v = v * (12.92f*255.0f);
+ else
+ {
+ u.f = ( v * 6521909350804488192.0f );
+ u.i = (int32_t)ROUND_POSITIVE_FLOAT( (float)u.i * 0.666666666666f );
+ vpow = u.f;
+ v = ( v * vpow ) + ( ( v * v ) / sqrtf( vpow ) );
+ v = ( (1.055f*255.0f) * sqrtf( sqrtf( v * 0.5290553722f ) ) - (0.055f*255.0f) );
+ }
+ return v;
+}
+
+#else
+
+/* Input is 0.0,255.0, output is 0.0,1.0 */
+/* Only for reference, this is waayyy too slow and should never be used */
+static inline CC_ALWAYSINLINE float srgb2linear( float v )
+{
+ v *= (1.0f/255.0f);
+ if( v <= 0.04045f )
+ v = v * (1.0f/12.92);
+ else
+ v = powf( ( v + 0.055f ) * (1.0f/1.055f), 2.4f );
+ return v;
+}
+
+/* Input is 0.0,1.0, output is 0.0,255.0 */
+/* Only for reference, this is waayyy too slow and should never be used */
+static inline CC_ALWAYSINLINE float linear2srgb( float v )
+{
+ if( v <= 0.0031308f )
+ v = v * 12.92f;
+ else
+ v = 1.055f * powf( v, 1.0f/2.4f ) - 0.055f;
+ return 255.0f * v;
+}
+
+#endif
+
+
+////
+
+
+#if CPU_SSE2_SUPPORT
+
+static const float CPU_ALIGN16 srgbLinearConst00[4] = { 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f, 1024.0f };
+static const float CPU_ALIGN16 srgbLinearConst01[4] = { (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), 1.0f };
+static const float CPU_ALIGN16 srgbLinearConst02[4] = { 0.055f*255.0f, 0.055f*255.0f, 0.055f*255.0f, 0.055f*255.0f };
+static const float CPU_ALIGN16 srgbLinearConst03[4] = { (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f) };
+static const float CPU_ALIGN16 srgbLinearConst04[4] = { 5417434112.0f, 5417434112.0f, 5417434112.0f, 5417434112.0f };
+static const float CPU_ALIGN16 srgbLinearConst05[4] = { 0.8f, 0.8f, 0.8f, 0.8f };
+static const float CPU_ALIGN16 srgbLinearConst06[4] = { 0.51011878327f, 0.51011878327f, 0.51011878327f, 0.51011878327f };
+static const float CPU_ALIGN16 srgbLinearConst07[4] = { 0.0031308f, 0.0031308f, 0.0031308f, 1024.0f };
+static const float CPU_ALIGN16 srgbLinearConst08[4] = { 12.92f*255.0f, 12.92f*255.0f, 12.92f*255.0f, 1.0f };
+static const float CPU_ALIGN16 srgbLinearConst09[4] = { 6521909350804488192.0f, 6521909350804488192.0f, 6521909350804488192.0f, 6521909350804488192.0f };
+static const float CPU_ALIGN16 srgbLinearConst10[4] = { 0.666666666666f, 0.666666666666f, 0.666666666666f, 0.666666666666f };
+static const float CPU_ALIGN16 srgbLinearConst11[4] = { 0.5290553722f, 0.5290553722f, 0.5290553722f, 0.5290553722f };
+static const float CPU_ALIGN16 srgbLinearConst12[4] = { 1.055f*255.0f, 1.055f*255.0f, 1.055f*255.0f, 1.055f*255.0f };
+static const float CPU_ALIGN16 srgbLinearConst13[4] = { -0.055f*255.0f, -0.055f*255.0f, -0.055f*255.0f, -0.055f*255.0f };
+static const float CPU_ALIGN16 srgbLinearConst14[4] = { 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f };
+static const float CPU_ALIGN16 srgbLinearConst15[4] = { (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f) };
+
+/* Input is 0.0,255.0 ~ output is 0.0,1.0 ~ alpha channel is passed as-is */
+static inline CC_ALWAYSINLINE __m128 srgb2linear3( __m128 vx )
+{
+ __m128 vmask, vbase;
+ __m128 vpow, vpwsqrtinv, vpwsqrt, vx2;
+ vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst00 );
+ vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst01 );
+ vx = _mm_mul_ps( _mm_add_ps( vx, *(__m128*)srgbLinearConst02 ), *(__m128*)srgbLinearConst03 );
+ vx2 = _mm_mul_ps( vx, vx );
+ vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst04 ) ) ), *(__m128*)srgbLinearConst05 ) ) );
+ vpwsqrtinv = _mm_rsqrt_ps( vpow );
+ vpwsqrt = _mm_mul_ps( vpow, vpwsqrtinv );
+ vx = _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx2, vpwsqrt ), _mm_mul_ps( _mm_mul_ps( _mm_mul_ps( vx2, vx ), vpwsqrtinv ), _mm_rsqrt_ps( vpwsqrt ) ) ), *(__m128*)srgbLinearConst06 );
+ return CPU_BLENDV_PS( vx, vbase, vmask );
+}
+
+/* Input is 0.0,1.0 ~ output is 0.0,255.0 ~ alpha channel is passed as-is */
+static inline CC_ALWAYSINLINE __m128 linear2srgb3( __m128 vx )
+{
+ __m128 vmask, vbase, vpow;
+ vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst07 );
+ vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst08 );
+ vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst09 ) ) ), *(__m128*)srgbLinearConst10 ) ) );
+ vx = _mm_add_ps( _mm_mul_ps( _mm_sqrt_ps( _mm_sqrt_ps( _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx, vpow ), _mm_mul_ps( _mm_mul_ps( vx, vx ), _mm_rsqrt_ps( vpow ) ) ), *(__m128*)srgbLinearConst11 ) ) ), *(__m128*)srgbLinearConst12 ), *(__m128*)srgbLinearConst13 );
+ return CPU_BLENDV_PS( vx, vbase, vmask );
+}
+
+/* Input is 0.0,255.0 ~ output is 0.0,1.0 ~ alpha channel is passed as-is */
+static inline CC_ALWAYSINLINE __m128 srgb2linear4( __m128 vx )
+{
+ __m128 vmask, vbase;
+ __m128 vpow, vpwsqrtinv, vpwsqrt, vx2;
+ vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst14 );
+ vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst15 );
+ vx = _mm_mul_ps( _mm_add_ps( vx, *(__m128*)srgbLinearConst02 ), *(__m128*)srgbLinearConst03 );
+ vx2 = _mm_mul_ps( vx, vx );
+ vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst04 ) ) ), *(__m128*)srgbLinearConst05 ) ) );
+ vpwsqrtinv = _mm_rsqrt_ps( vpow );
+ vpwsqrt = _mm_mul_ps( vpow, vpwsqrtinv );
+ vx = _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx2, vpwsqrt ), _mm_mul_ps( _mm_mul_ps( _mm_mul_ps( vx2, vx ), vpwsqrtinv ), _mm_rsqrt_ps( vpwsqrt ) ) ), *(__m128*)srgbLinearConst06 );
+ return CPU_BLENDV_PS( vx, vbase, vmask );
+}
+
+#endif
+
+
+////
+
+
+static inline CC_ALWAYSINLINE double bessel( double x )
+{
+ double sum, t, y;
+
+ /* Zeroth order Bessel function of the first kind. */
+ sum = 1.0;
+ y = x * x * 0.25;
+ t = y;
+ sum += t;
+ t *= y * (1.0/(2.0*2.0));
+ sum += t;
+ t *= y * (1.0/(3.0*3.0));
+ sum += t;
+ t *= y * (1.0/(4.0*4.0));
+ sum += t;
+ t *= y * (1.0/(5.0*5.0));
+ sum += t;
+ t *= y * (1.0/(6.0*6.0));
+ sum += t;
+ t *= y * (1.0/(7.0*7.0));
+ sum += t;
+ t *= y * (1.0/(8.0*8.0));
+ sum += t;
+ t *= y * (1.0/(9.0*9.0));
+ sum += t;
+ t *= y * (1.0/(10.0*10.0));
+ sum += t;
+ t *= y * (1.0/(11.0*11.0));
+ sum += t;
+ t *= y * (1.0/(12.0*12.0));
+ sum += t;
+ t *= y * (1.0/(13.0*13.0));
+ sum += t;
+ t *= y * (1.0/(14.0*14.0));
+ sum += t;
+
+ return sum;
+}
+
+static inline CC_ALWAYSINLINE double kaiser( double x, double beta )
+{
+ return bessel( beta * sqrt( fmax( 0.0, 1.0 - ( x * x ) ) ) );
+}
+
+static inline CC_ALWAYSINLINE double sinc( double x )
+{
+ if( x == 0.0 )
+ return 1.0;
+ x = sin( x * M_PI ) / ( x * M_PI );
+ return x;
+}
+
+
+////
+
+
+#if CPU_SSE2_SUPPORT
+
+static inline CC_ALWAYSINLINE __m128 simd4f_bessel( __m128 x )
+{
+ __m128 sum, t, y;
+
+ sum = *(__m128 *)simd4fOne;
+ y = _mm_mul_ps( *(__m128 *)simd4fQuarter, _mm_mul_ps( x, x ) );
+ t = y;
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(2.0f*2.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(3.0f*3.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(4.0f*4.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(5.0f*5.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(6.0f*6.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(7.0f*7.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(8.0f*8.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(9.0f*9.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(10.0f*10.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(11.0f*11.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(12.0f*12.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(13.0f*13.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+ t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(14.0f*14.0f) ) ) );
+ sum = _mm_add_ps( sum, t );
+
+ return sum;
+}
+
+static inline CC_ALWAYSINLINE __m128 simd4f_kaiser( __m128 x, __m128 beta )
+{
+ return simd4f_bessel( _mm_mul_ps( beta, _mm_sqrt_ps( _mm_max_ps( _mm_setzero_ps(), _mm_sub_ps( *(__m128 *)simd4fOne, _mm_mul_ps( x, x ) ) ) ) ) );
+}
+
+static inline CC_ALWAYSINLINE __m128 simd4f_sinc( __m128 x )
+{
+ __m128 zeromask;
+ zeromask = _mm_cmpeq_ps( x, _mm_setzero_ps() );
+ x = _mm_mul_ps( x, _mm_load_ps( simd4fPi ) );
+ x = _mm_div_ps( simd4f_sin_ps( x ), x );
+ x = CPU_BLENDV_PS( x, *(__m128 *)simd4fOne, zeromask );
+ return x;
+}
+
+#endif
+
+
+////
+
+
+typedef struct
+{
+ int matrixsize;
+ int matrixoffset;
+ int matrixrowwidth;
+ int matrixrowsize;
+ int rowreturn;
+ float *matrix;
+
+ int minimumalpha;
+ float dithersum;
+ float minimumalphaf;
+ float amplifynormal;
+ float normalsustainfactor;
+ void *alloc;
+
+ unsigned char *srcdata;
+ int width1;
+ int width2;
+ int width3;
+ int width4;
+ int height;
+ int bytesperline;
+} imStaticMatrixState;
+
+
+static int imBuildStaticMatrix( imStaticMatrixState * CC_RESTRICT state, int sizedivisor, float hopcount, float alpha )
+{
+ int i, j, minx, maxx;
+ double x, xshift, hopsize, offset, scalefactor, hopcountinv, beta, linsq, sum;
+ double *linear;
+ float suminv;
+ float *matrix;
+
+ if( alpha > 16.0f )
+ alpha = 16.0f;
+ beta = (double)alpha * (double)M_PI;
+ hopcountinv = 1.0 / (double)hopcount;
+
+ scalefactor = 1.0 / (double)sizedivisor;
+ hopsize = 0.5 * (double)sizedivisor;
+ offset = hopsize - 0.5;
+ minx = (int)ceil( ( (double)-hopcount * hopsize ) + offset );
+ maxx = (int)floor( ( (double)hopcount * hopsize ) + offset );
+ state->matrixoffset = minx;
+ state->matrixsize = ( maxx - minx ) + 1;
+ state->matrixrowwidth = ( state->matrixsize + 3 ) & ~3;
+ state->rowreturn = state->matrixrowwidth - state->matrixsize;
+ state->matrixrowsize = state->matrixrowwidth * sizeof(float);
+
+#if IM_RESIZE_DEBUG
+ printf( "ResizeMatrix ; scalefactor %.3f, offset %.3f, hopsize %.3f\n", scalefactor, offset, hopsize );
+#endif
+
+ linear = malloc( state->matrixrowwidth * sizeof(double) );
+ for( i = 0 ; i < state->matrixsize ; i++ )
+ {
+ x = (double)( i + state->matrixoffset );
+ xshift = 2.0 * scalefactor * ( x - offset );
+ linear[i] = sinc( xshift ) * kaiser( hopcountinv * xshift, beta );
+#if IM_RESIZE_DEBUG
+ printf( " x[%+.3f] = %+.3f ( %+.3f * %+.3f )\n", x, linear[i], sinc( xshift ), kaiser( hopcountinv * xshift, beta ) );
+#endif
+ }
+ for( ; i < state->matrixrowwidth ; i++ )
+ linear[i] = 0.0;
+
+ /* Build normalized state */
+ state->alloc = malloc( ( state->matrixsize * state->matrixrowsize ) + 16 );
+ state->matrix = (void *)( ( (uintptr_t)state->alloc + 0xf ) & ~0xf );
+ matrix = state->matrix;
+ sum = 0.0;
+ for( i = 0 ; i < state->matrixsize ; i++ )
+ {
+ for( j = 0 ; j < state->matrixsize ; j++ )
+ {
+ linsq = linear[i] * linear[j];
+ matrix[j] = (float)linsq;
+ sum += linsq;
+ }
+ for( ; j < state->matrixrowwidth ; j++ )
+ matrix[j] = 0.0f;
+ matrix += state->matrixrowwidth;
+ }
+ free( linear );
+
+#if IM_RESIZE_DEBUG
+ printf( "Matrix sum : %f\n", sum );
+#endif
+
+ suminv = (float)( 1.0 / sum );
+ j = state->matrixsize * state->matrixrowwidth;
+ for( i = 0 ; i < j ; i++ )
+ state->matrix[i] *= suminv;
+
+#if IM_RESIZE_DEBUG
+ printf( "Matrix %dx%d :\n", state->matrixsize, state->matrixsize );
+ for( i = 0 ; i < state->matrixsize ; i++ )
+ {
+ for( j = 0 ; j < state->matrixsize ; j++ )
+ printf( " %+.6f", state->matrix[ ( i * state->matrixrowwidth ) + j ] );
+ printf( "\n" );
+ }
+ printf( "Matrix Offset : %d\n", state->matrixoffset );
+ printf( "Matrix Size : %d\n", state->matrixsize );
+ printf( "Matrix Rowwidth : %d\n", state->matrixrowwidth );
+#endif
+
+ return 1;
+}
+
+
+static void imFreeStaticState( imStaticMatrixState * CC_RESTRICT state )
+{
+ free( state->alloc );
+ state->alloc = 0;
+ return;
+}
+
+
+////
+
+
+typedef struct
+{
+ int matrixsizex, matrixsizey;
+ int matrixoffsetx, matrixoffsety;
+ float *linearx;
+ float *lineary;
+ float beta;
+ float hopcountinv;
+
+ float dithersum;
+ int minimumalpha;
+ float minimumalphaf;
+ float amplifynormal;
+ float normalsustainfactor;
+ void *alloc;
+
+ unsigned char *srcdata;
+ int width1;
+ int width2;
+ int width3;
+ int width4;
+ int height;
+ int bytesperline;
+} imGenericMatrixState;
+
+
+static inline int imAllocGenericState( imGenericMatrixState *state, float scalex, float scaley, float hopcount, float alpha )
+{
+ int allocx, allocy, size;
+ void *align;
+ if( alpha > 16.0f )
+ alpha = 16.0f;
+ allocx = ( (int)ceilf( hopcount / scalex ) + 2 + 3 ) & ~0x3;
+ allocy = ( (int)ceilf( hopcount / scaley ) + 2 + 3 ) & ~0x3;
+ size = ( ( allocx + allocy ) * sizeof(float) ) + 16;
+ state->alloc = malloc( size );
+ memset( state->alloc, 0, size );
+ align = (void *)( ( (uintptr_t)state->alloc + 0xf ) & ~0xf );
+ state->linearx = align;
+ state->lineary = ADDRESS( align, allocx * sizeof(float) );
+ state->beta = alpha * (float)M_PI;
+ state->hopcountinv = 1.0f / (float)hopcount;
+ return 1;
+}
+
+static inline void imBuildGenericLinearX( imGenericMatrixState *state, float scalex, float scaleinvx, float sourcex, float hopcount, float alpha, int width )
+{
+ int i, minx, maxx;
+ float hopsizex, offsetx;
+ float *linearx;
+
+ hopsizex = 0.5f * scaleinvx;
+ offsetx = (float)sourcex;
+ minx = (int)ceil( ( -hopcount * hopsizex ) + offsetx );
+ maxx = (int)floor( ( hopcount * hopsizex ) + offsetx );
+ state->matrixsizex = ( maxx - minx ) + 1;
+ state->matrixoffsetx = ( minx + ( width << 8 ) ) % width;
+
+ linearx = state->linearx;
+ scalex *= 2.0f;
+#if CPU_SSE2_SUPPORT
+ for( i = 0 ; i < state->matrixsizex ; i += 4 )
+ {
+ __m128 vx, vxshift;
+ vx = _mm_add_ps( _mm_set1_ps( (float)( i + minx ) ), _mm_load_ps( simd4fZeroOneTwoThree ) );
+ vxshift = _mm_mul_ps( _mm_set1_ps( scalex ), _mm_sub_ps( vx, _mm_set1_ps( offsetx ) ) );
+ _mm_store_ps( &linearx[i], _mm_mul_ps( simd4f_sinc( vxshift ), simd4f_kaiser( _mm_mul_ps( _mm_set1_ps( state->hopcountinv ), vxshift ), _mm_set1_ps( state->beta ) ) ) );
+ #if IM_RESIZE_DEBUG
+ printf( " linearx[%d] = %.3f\n", i+minx+0, linearx[i+0] );
+ printf( " linearx[%d] = %.3f\n", i+minx+1, linearx[i+1] );
+ printf( " linearx[%d] = %.3f\n", i+minx+2, linearx[i+2] );
+ printf( " linearx[%d] = %.3f\n", i+minx+3, linearx[i+3] );
+ #endif
+ }
+#else
+ for( i = 0 ; i < state->matrixsizex ; i++ )
+ {
+ float x, xshift;
+ x = (float)( i + minx );
+ xshift = scalex * ( x - offsetx );
+ linearx[i] = (float)( sinc( xshift ) * kaiser( state->hopcountinv * xshift, state->beta ) );
+ #if IM_RESIZE_DEBUG
+ printf( " linearx[%+.3f] = %.3f ( %+.3f * %+.3f )\n", x, linearx[i], sinc( xshift ), kaiser( state->hopcountinv * xshift, state->beta ) );
+ #endif
+ }
+#endif
+
+ return;
+}
+
+static inline void imBuildGenericLinearY( imGenericMatrixState *state, float scaley, float scaleinvy, float sourcey, float hopcount, float alpha, int height )
+{
+ int i, miny, maxy;
+ float hopsizey, offsety;
+ float *lineary;
+
+ hopsizey = 0.5f * scaleinvy;
+ offsety = (float)sourcey;
+ miny = (int)ceil( ( -hopcount * hopsizey ) + offsety );
+ maxy = (int)floor( ( hopcount * hopsizey ) + offsety );
+ state->matrixsizey = ( maxy - miny ) + 1;
+ state->matrixoffsety = ( miny + ( height << 8 ) ) % height;
+
+ lineary = state->lineary;
+ scaley *= 2.0f;
+#if CPU_SSE2_SUPPORT
+ for( i = 0 ; i < state->matrixsizey ; i += 4 )
+ {
+ __m128 vy, vyshift;
+ vy = _mm_add_ps( _mm_set1_ps( (float)( i + miny ) ), _mm_load_ps( simd4fZeroOneTwoThree ) );
+ vyshift = _mm_mul_ps( _mm_set1_ps( scaley ), _mm_sub_ps( vy, _mm_set1_ps( offsety ) ) );
+ _mm_store_ps( &lineary[i], _mm_mul_ps( simd4f_sinc( vyshift ), simd4f_kaiser( _mm_mul_ps( _mm_set1_ps( state->hopcountinv ), vyshift ), _mm_set1_ps( state->beta ) ) ) );
+ #if IM_RESIZE_DEBUG
+ printf( " lineary[%d] = %.3f\n", i+miny+0, lineary[i+0] );
+ printf( " lineary[%d] = %.3f\n", i+miny+1, lineary[i+1] );
+ printf( " lineary[%d] = %.3f\n", i+miny+2, lineary[i+2] );
+ printf( " lineary[%d] = %.3f\n", i+miny+3, lineary[i+3] );
+ #endif
+ }
+#else
+ for( i = 0 ; i < state->matrixsizey ; i++ )
+ {
+ float y, yshift;
+ y = (float)( i + miny );
+ yshift = scaley * ( y - offsety );
+ lineary[i] = (float)( sinc( yshift ) * kaiser( state->hopcountinv * yshift, state->beta ) );
+ #if IM_RESIZE_DEBUG
+ printf( " lineary[%+.3f] = %.3f ( %+.3f * %+.3f )\n", y, lineary[i], sinc( yshift ), kaiser( state->hopcountinv * yshift, state->beta ) );
+ #endif
+ }
+#endif
+
+ return;
+}
+
+
+static inline void imFreeGenericState( imGenericMatrixState *state )
+{
+ free( state->alloc );
+ state->alloc = 0;
+ return;
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+static void imStaticKernel1Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ mapx++;
+ if( mapx >= state->width1 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernel2Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 1;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ mapx += 2;
+ if( mapx >= state->width2 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernel3Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx + ( pointx << 1 );
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernel4Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f, sum0, sum1, sum2, sum3;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ sum3 += f * (float)src[ mapx + 3 ];
+#endif
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+#else
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+#if CPU_SSE2_SUPPORT
+
+static void imStaticKernel4Linear_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ __m128 vsum, vf, v0, v1, v2, v3;
+ __m128i vzero;
+ float *matrix;
+ unsigned char *src;
+
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x += 4 )
+ {
+ vf = _mm_load_ps( &matrix[x] );
+ v0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 0 ] ) ), vzero ) );
+ v1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 4 ] ) ), vzero ) );
+ v2 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 8 ] ) ), vzero ) );
+ v3 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 12 ] ) ), vzero ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x00 ), v0 ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x55 ), v1 ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xaa ), v2 ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xff ), v3 ) );
+ mapx += 16;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+
+ return;
+}
+
+#endif
+
+
+////
+
+
+static void imStaticKernel4LinearAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2, sum3;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x] * (float)src[ mapx + 3 ];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ sum3 += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum0 * f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum1 * f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum2 * f ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+
+ return;
+}
+
+
+#if CPU_SSE2_SUPPORT
+
+static void imStaticKernel4LinearAlphaNorm_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ uint32_t pixel;
+ float *matrix;
+ unsigned char *src;
+ __m128 vsum0, vsum1, vsum2, vsum3;
+ __m128 vf, valpha, vr, vg, vb, va, vsrcf;
+ __m128i vsrc, vshufmask;
+ __m128i vzero;
+
+ #if CPU_SSSE3_SUPPORT
+ vshufmask = _mm_setr_epi8( 0x00,0x04,0x08,0x0c, 0x01,0x05,0x09,0x0d, 0x02,0x06,0x0a,0x0e, 0x03,0x07,0x0b,0x0f );
+ #endif
+ vsum0 = _mm_setzero_ps();
+ vsum1 = _mm_setzero_ps();
+ vsum2 = _mm_setzero_ps();
+ vsum3 = _mm_setzero_ps();
+ vzero = _mm_castps_si128( _mm_setzero_ps() );
+
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x += 4 )
+ {
+ vf = _mm_load_ps( &matrix[x] );
+ /* Load 16 bytes and unpack as RRRR,GGGG,BBBB,AAAA in one SSE register */
+ vsrc = _mm_loadu_si128( (void *)&src[ mapx ] );
+ #if CPU_SSSE3_SUPPORT
+ vsrc = _mm_shuffle_epi8( vsrc, vshufmask );
+ #else
+ vshufmask = _mm_shuffle_epi32( vsrc, 0x39 );
+ vsrc = _mm_unpacklo_epi16( _mm_unpacklo_epi8( vsrc, vshufmask ), _mm_unpackhi_epi8( vsrc, vshufmask ) );
+ #endif
+ /* Break that into 4 SSE registers as floats: vR,vG,vB,vA */
+ vsrcf = _mm_castsi128_ps( vsrc );
+ vr = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( vsrcf ), vzero ) );
+ #if CPU_SSE3_SUPPORT
+ vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehdup_ps( vsrcf ) ), vzero ) );
+ #else
+ vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0x55 ) ), vzero ) );
+ #endif
+ vb = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehl_ps( vsrcf, vsrcf ) ), vzero ) );
+ va = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0xff ) ), vzero ) );
+ valpha = _mm_mul_ps( va, vf );
+ vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( vr, valpha ) );
+ vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( vg, valpha ) );
+ vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( vb, valpha ) );
+ vsum3 = _mm_add_ps( vsum3, valpha );
+ mapx += 16;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ #if CPU_SSE3_SUPPORT
+ vsum0 = _mm_hadd_ps( vsum0, vsum1 );
+ vsum2 = _mm_hadd_ps( vsum2, vsum3 );
+ vsum0 = _mm_hadd_ps( vsum0, vsum2 );
+ #else
+ vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum2 ), _mm_unpackhi_ps( vsum0, vsum2 ) );
+ vsum1 = _mm_add_ps( _mm_unpacklo_ps( vsum1, vsum3 ), _mm_unpackhi_ps( vsum1, vsum3 ) );
+ vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum1 ), _mm_unpackhi_ps( vsum0, vsum1 ) );
+ #endif
+
+ valpha = _mm_shuffle_ps( vsum0, vsum0, 0xff );
+ pixel = 0;
+ if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
+ {
+ __m128i vpixel;
+ vsum0 = _mm_mul_ps( vsum0, _mm_rcp_ps( valpha ) );
+ vsum0 = CPU_BLENDV_PS( vsum0, valpha, *(__m128 *)simd4fAlphaMask );
+ vpixel = _mm_cvtps_epi32( vsum0 );
+ vpixel = _mm_packs_epi32( vpixel, vpixel );
+ vpixel = _mm_packus_epi16( vpixel, vpixel );
+ pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
+ }
+ *(uint32_t *)dst = pixel;
+
+ return;
+}
+
+#endif
+
+
+////
+
+
+static void imStaticKernel1sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f, sum0;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_set_ss( (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set_ss( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+#endif
+ mapx++;
+ if( mapx >= state->width1 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ dst[0] = _mm_cvtsi128_si32( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( linear2srgb3( vsum ) ), vzero ), vzero ) );
+#else
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+static void imStaticKernel2sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f, sum0, sum1;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 1;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_set_ps( 0.0f, 0.0f, (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+#endif
+ mapx += 2;
+ if( mapx >= state->width2 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ union
+ {
+ char c[4];
+ uint32_t i;
+ } u;
+ vsum = linear2srgb3( vsum );
+ _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+ dst[0] = u.c[0];
+ dst[1] = u.c[1];
+#else
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+static void imStaticKernel3sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f, sum0, sum1, sum2;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx + ( pointx << 1 );
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_set_ps( 0.0f, (float)src[ mapx + 2 ], (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
+#endif
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ union
+ {
+ char c[4];
+ uint32_t i;
+ } u;
+ vsum = linear2srgb3( vsum );
+ _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+ dst[0] = u.c[0];
+ dst[1] = u.c[1];
+ dst[2] = u.c[2];
+#else
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+static void imStaticKernel4sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f, sum0, sum1, sum2, sum3;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
+ sum3 += f * (float)src[ mapx + 3 ];
+#endif
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ vsum = linear2srgb3( vsum );
+ _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+#else
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+#if CPU_SSE2_SUPPORT
+
+static void imStaticKernel3sRGB_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ __m128 vsum0, vsum1, vsum2, vsrc0, vsrc1, vsrc2, vf;
+ __m128i vzero;
+ float *matrix;
+ unsigned char *src;
+ union
+ {
+ char c[4];
+ uint32_t i;
+ } u;
+
+ vsum0 = _mm_setzero_ps();
+ vsum1 = _mm_setzero_ps();
+ vsum2 = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx + ( pointx << 1 );
+ for( x = 0 ; x < state->matrixsize ; x += 4 )
+ {
+ vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
+ vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
+ vsrc2 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+8 ] ) ), vzero ) );
+ vsrc0 = srgb2linear4( vsrc0 );
+ vsrc1 = srgb2linear4( vsrc1 );
+ vsrc2 = srgb2linear4( vsrc2 );
+ vf = _mm_load_ps( &matrix[x] );
+ vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x40 ), vsrc0 ) );
+ vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xA5 ), vsrc1 ) );
+ vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xFE ), vsrc2 ) );
+ mapx += 12;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ #if CPU_SSSE3_SUPPORT
+ vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum1 ), _mm_castps_si128( vsum0 ), 12 ) ) );
+ vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum2 ), _mm_castps_si128( vsum1 ), 8 ) ) );
+ vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum2 ), _mm_castps_si128( vsum2 ), 4 ) ) );
+ #else
+ vf = _mm_shuffle_ps( vsum0, vsum1, 0x4f );
+ vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vf, vf, 0x38 ) );
+ vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vsum1, vsum2, 0x0E ) );
+ vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vsum2, vsum2, 0x39 ) );
+ #endif
+
+ vsum0 = linear2srgb3( vsum0 );
+ _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
+ dst[0] = u.c[0];
+ dst[1] = u.c[1];
+ dst[2] = u.c[2];
+
+ return;
+}
+
+static void imStaticKernel4sRGB_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ __m128 vsum, vsrc0, vsrc1;
+ __m128i vzero;
+ float *matrix;
+ unsigned char *src;
+
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x += 2 )
+ {
+ vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
+ vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
+ vsrc0 = srgb2linear3( vsrc0 );
+ vsrc1 = srgb2linear3( vsrc1 );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x+0] ), vsrc0 ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x+1] ), vsrc1 ) );
+ mapx += 8;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ vsum = linear2srgb3( vsum );
+ _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+
+ return;
+}
+
+#endif
+
+
+////
+
+
+static void imStaticKernel4sRGBAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc, valpha;
+ __m128i vzero;
+ uint32_t pixel;
+#else
+ float f, sum0, sum1, sum2, sum3;
+#endif
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ valpha = _mm_shuffle_ps( vsrc, _mm_set_ss( 1.0f ), 0x0f );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha, valpha, 0xC0 ), _mm_set1_ps( matrix[x] ) ), vsrc ) );
+#else
+ f = matrix[x] * (float)src[ mapx + 3 ];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
+ sum3 += f;
+#endif
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ valpha = _mm_shuffle_ps( vsum, vsum, 0xff );
+ pixel = 0;
+ if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
+ {
+ __m128i vpixel;
+ vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha ) );
+ vsum = CPU_BLENDV_PS( vsum, valpha, *(__m128 *)simd4fAlphaMask );
+ vsum = linear2srgb3( vsum );
+ vpixel = _mm_cvtps_epi32( vsum );
+ vpixel = _mm_packs_epi32( vpixel, vpixel );
+ vpixel = _mm_packus_epi16( vpixel, vpixel );
+ pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
+ }
+ *(uint32_t *)dst = pixel;
+#else
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * f ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+#endif
+
+ return;
+}
+
+
+#if CPU_SSE2_SUPPORT
+
+static void imStaticKernel4sRGBAlphaNorm_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ __m128 vsum, vsrc0, vsrc1, valpha0, valpha1;
+ __m128i vzero;
+ uint32_t pixel;
+ float *matrix;
+ unsigned char *src;
+
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x += 2 )
+ {
+ vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
+ vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
+ valpha0 = _mm_shuffle_ps( vsrc0, _mm_set_ss( 1.0f ), 0x0f );
+ valpha1 = _mm_shuffle_ps( vsrc1, _mm_set_ss( 1.0f ), 0x0f );
+ vsrc0 = srgb2linear3( vsrc0 );
+ vsrc1 = srgb2linear3( vsrc1 );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha0, valpha0, 0xC0 ), _mm_set1_ps( matrix[x+0] ) ), vsrc0 ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha1, valpha1, 0xC0 ), _mm_set1_ps( matrix[x+1] ) ), vsrc1 ) );
+ mapx += 8;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ valpha0 = _mm_shuffle_ps( vsum, vsum, 0xff );
+ pixel = 0;
+ if( _mm_comige_ss( valpha0, _mm_load_ss( &state->minimumalphaf ) ) )
+ {
+ __m128i vpixel;
+ vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha0 ) );
+ vsum = CPU_BLENDV_PS( vsum, valpha0, *(__m128 *)simd4fAlphaMask );
+ vsum = linear2srgb3( vsum );
+ vpixel = _mm_cvtps_epi32( vsum );
+ vpixel = _mm_packs_epi32( vpixel, vpixel );
+ vpixel = _mm_packus_epi16( vpixel, vpixel );
+ pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
+ }
+ *(uint32_t *)dst = pixel;
+
+ return;
+}
+
+#endif
+
+////
+
+
+static void imStaticKernel3Normal( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx + ( pointx << 1 );
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ sum0 -= 0.5f*255.0f;
+ sum1 -= 0.5f*255.0f;
+ sum2 -= 0.5f*255.0f;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
+ sum0 = (0.5f*255.0f) + ( sum0 * suminv );
+ sum1 = (0.5f*255.0f) + ( sum1 * suminv );
+ sum2 = (0.5f*255.0f) + ( sum2 * suminv );
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernel4Normal( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float f;
+#endif
+ float sum0, sum1, sum2, sum3, suminv;
+ float *matrix;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
+#else
+ f = matrix[x];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ sum3 += f * (float)src[ mapx + 3 ];
+#endif
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_sub_ps( vsum, _mm_set_ps( 0.0f, 0.5f*255.0f, 0.5f*255.0f, 0.5f*255.0f ) );
+ sum0 = _mm_cvtss_f32( vsum );
+ #if CPU_SSE3_SUPPORT
+ sum1 = _mm_cvtss_f32( _mm_movehdup_ps( vsum ) );
+ #else
+ sum1 = _mm_cvtss_f32( _mm_shuffle_ps( vsum, vsum, 0x55 ) );
+ #endif
+ sum2 = _mm_cvtss_f32( _mm_movehl_ps( vsum, vsum ) );
+ sum3 = _mm_cvtss_f32( _mm_shuffle_ps( vsum, vsum, 0xff ) );
+#else
+ sum0 -= 0.5f*255.0f;
+ sum1 -= 0.5f*255.0f;
+ sum2 -= 0.5f*255.0f;
+#endif
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
+ sum0 = (0.5f*255.0f) + ( sum0 * suminv );
+ sum1 = (0.5f*255.0f) + ( sum1 * suminv );
+ sum2 = (0.5f*255.0f) + ( sum2 * suminv );
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+static void imStaticKernel3NormalSustain( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, v0, v1, v2, energy, sum0, sum1, sum2, sumenergy, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sumenergy = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx + ( pointx << 1 );
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernel4NormalSustain( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ sumenergy = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x];
+ v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
+ v3 = f * (float)src[ mapx + 3 ];
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ sum3 += v3;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+static void imStaticKernel4NormalSustainAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy;
+ float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ sumenergy = 0.0f;
+ matrix = state->matrix;
+ mapy = pointy;
+ for( y = 0 ; y < state->matrixsize ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = pointx << 2;
+ for( x = 0 ; x < state->matrixsize ; x++ )
+ {
+ f = matrix[x] * (float)src[ mapx + 3 ];
+ v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
+ v3 = f;
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ sum3 += v3;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ matrix = ADDRESS( matrix, state->matrixrowsize );
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ sum0 *= f;
+ sum1 *= f;
+ sum2 *= f;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+
+ return;
+}
+
+
+////
+
+
+static void imStaticKernelPoT3Water( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy, heightmask, widthmask;
+ int minx, maxx, miny, maxy;
+ float f, sum0, sum1, sum2, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ minx = pointx;
+ maxx = minx + state->matrixsize;
+ miny = pointy;
+ maxy = miny + state->matrixsize;
+ heightmask = state->height - 1;
+ widthmask = state->width1 - 1;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ matrix = state->matrix;
+ for( y = miny ; y < maxy ; y++ )
+ {
+ mapy = y & heightmask;
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ for( x = minx ; x < maxx ; x++, matrix++ )
+ {
+ mapx = x & widthmask;
+ mapx += mapx << 1;
+ f = *matrix;
+ sum0 += (float)src[ mapx + 0 ] * f;
+ sum1 += (float)src[ mapx + 1 ] * f;
+ sum2 += (float)src[ mapx + 2 ] * f;
+ }
+ matrix += state->rowreturn;
+ }
+
+ sum0 *= 1.0f/255.0f;
+ sum1 *= 1.0f/255.0f;
+ sum2 *= 1.0f/255.0f;
+ sum0 = 2.0f * ( sum0 - 0.5f );
+ sum1 = 2.0f * ( sum1 - 0.5f );
+ suminv = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ if( suminv < 0.75f )
+ {
+ suminv = 0.5f / suminv;
+ sum0 = 0.5f + ( sum0 * suminv );
+ sum1 = 0.5f + ( sum1 * suminv );
+ }
+ if( sum2 > 0.1f )
+ {
+ state->dithersum += sum2;
+ if( sum2 > 0.45f )
+ sum2 = 1.0f;
+ else if( ( sum2 < 0.3f ) && ( state->dithersum < 1.0f ) )
+ sum2 = 0.0f;
+ else
+ sum2 = ( ( sum2 + state->dithersum ) < 0.45f ? 0.0f : 1.0f );
+ state->dithersum -= sum2;
+ }
+ sum0 *= 255.0f;
+ sum1 *= 255.0f;
+ sum2 *= 255.0f;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+static void imStaticKernelPoT4Water( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy, heightmask, widthmask;
+ int minx, maxx, miny, maxy;
+ float f, sum0, sum1, sum2, sum3, suminv;
+ float *matrix;
+ unsigned char *src;
+
+ minx = pointx;
+ maxx = minx + state->matrixsize;
+ miny = pointy;
+ maxy = miny + state->matrixsize;
+ heightmask = state->height - 1;
+ widthmask = state->width1 - 1;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrix = state->matrix;
+ for( y = miny ; y < maxy ; y++ )
+ {
+ mapy = y & heightmask;
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ for( x = minx ; x < maxx ; x++, matrix++ )
+ {
+ mapx = x & widthmask;
+ mapx <<= 2;
+ f = *matrix;
+ sum0 += (float)src[ mapx + 0 ] * f;
+ sum1 += (float)src[ mapx + 1 ] * f;
+ sum2 += (float)src[ mapx + 2 ] * f;
+ sum3 += (float)src[ mapx + 3 ] * f;
+ }
+ matrix += state->rowreturn;
+ }
+
+ sum0 *= 1.0f/255.0f;
+ sum1 *= 1.0f/255.0f;
+ sum2 *= 1.0f/255.0f;
+ sum0 = 2.0f * ( sum0 - 0.5f );
+ sum1 = 2.0f * ( sum1 - 0.5f );
+ suminv = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ if( suminv < 0.75f )
+ {
+ suminv = 0.5f / suminv;
+ sum0 = 0.5f + ( sum0 * suminv );
+ sum1 = 0.5f + ( sum1 * suminv );
+ }
+ if( sum2 > 0.1f )
+ {
+ state->dithersum += sum2;
+ if( sum2 > 0.45f )
+ sum2 = 1.0f;
+ else if( ( sum2 < 0.3f ) && ( state->dithersum < 1.0f ) )
+ sum2 = 0.0f;
+ else
+ sum2 = ( ( sum2 + state->dithersum ) < 0.45f ? 0.0f : 1.0f );
+ state->dithersum -= sum2;
+ }
+ sum0 *= 255.0f;
+ sum1 *= 255.0f;
+ sum2 *= 255.0f;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+static void imStaticKernelPoT4Plant( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
+{
+ int x, y, mapx, mapy, heightmask, widthmask;
+ int minx, maxx, miny, maxy;
+ float f, sum0, sum1, sum2, sum3;
+ float *matrix;
+ unsigned char *src;
+
+ minx = pointx;
+ maxx = minx + state->matrixsize;
+ miny = pointy;
+ maxy = miny + state->matrixsize;
+ heightmask = state->height - 1;
+ widthmask = state->width1 - 1;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrix = state->matrix;
+ for( y = miny ; y < maxy ; y++ )
+ {
+ mapy = y & heightmask;
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ for( x = minx ; x < maxx ; x++, matrix++ )
+ {
+ mapx = x & widthmask;
+ mapx <<= 2;
+ f = *matrix;
+ sum0 += (float)src[ mapx + 0 ] * f;
+ sum1 += (float)src[ mapx + 1 ] * f;
+ sum2 += (float)src[ mapx + 2 ] * f;
+ sum3 += (float)src[ mapx + 3 ] * f;
+ }
+ matrix += state->rowreturn;
+ }
+
+ sum3 *= 1.25f;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+int imReduceImageKaiserDataDivisor( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int sizedivisor, imReduceOptions *options )
+{
+ int filter, x, y, pointx, pointy, basex, basey, pow2flag;
+ int newwidth, newheight;
+ unsigned char *dst;
+ imStaticMatrixState state;
+ void (*applykernel)( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state );
+#if CPU_SSE2_SUPPORT
+ int corebase, corerange;
+ void (*applykernelcore)( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state );
+#endif
+
+ filter = options->filter;
+ imBuildStaticMatrix( &state, sizedivisor, options->hopcount, options->alpha );
+
+ newwidth = ( width < sizedivisor ) ? 1 : ( ( width + sizedivisor - 1 ) / sizedivisor );
+ newheight = ( height < sizedivisor ) ? 1 : ( ( height + sizedivisor - 1 ) / sizedivisor );
+
+ pow2flag = ccIsPow2Int32( width ) && ccIsPow2Int32( height );
+ applykernel = 0;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = 0;
+#endif
+
+ if( filter == IM_REDUCE_FILTER_LINEAR )
+ {
+ if( bytesperpixel == 4 )
+ {
+ applykernel = imStaticKernel4Linear;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imStaticKernel4Linear_Core;
+#endif
+ }
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3Linear;
+ else if( bytesperpixel == 2 )
+ applykernel = imStaticKernel2Linear;
+ else if( bytesperpixel == 1 )
+ applykernel = imStaticKernel1Linear;
+ }
+ else if( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ {
+ applykernel = imStaticKernel4LinearAlphaNorm;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imStaticKernel4LinearAlphaNorm_Core;
+#endif
+ }
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3Linear;
+ else if( bytesperpixel == 2 )
+ applykernel = imStaticKernel2Linear;
+ else if( bytesperpixel == 1 )
+ applykernel = imStaticKernel1Linear;
+ }
+ else if( filter == IM_REDUCE_FILTER_SRGB )
+ {
+ if( bytesperpixel == 4 )
+ {
+ applykernel = imStaticKernel4sRGB;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imStaticKernel4sRGB_Core;
+#endif
+ }
+ else if( bytesperpixel == 3 )
+ {
+ applykernel = imStaticKernel3sRGB;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imStaticKernel3sRGB_Core;
+#endif
+ }
+ else if( bytesperpixel == 2 )
+ applykernel = imStaticKernel2sRGB;
+ else if( bytesperpixel == 1 )
+ applykernel = imStaticKernel1sRGB;
+ }
+ else if( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ {
+ applykernel = imStaticKernel4sRGBAlphaNorm;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imStaticKernel4sRGBAlphaNorm_Core;
+#endif
+ }
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3sRGB;
+ else if( bytesperpixel == 2 )
+ applykernel = imStaticKernel2sRGB;
+ else if( bytesperpixel == 1 )
+ applykernel = imStaticKernel1sRGB;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imStaticKernel4Normal;
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3Normal;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imStaticKernel4NormalSustain;
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3NormalSustain;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imStaticKernel4NormalSustainAlphaNorm;
+ else if( bytesperpixel == 3 )
+ applykernel = imStaticKernel3NormalSustain;
+ }
+ else if( filter == IM_REDUCE_FILTER_WATERMAP )
+ {
+ if( ( bytesperpixel == 4 ) && ( pow2flag ) )
+ applykernel = imStaticKernelPoT4Water;
+ else if( ( bytesperpixel == 3 ) && ( pow2flag ) )
+ applykernel = imStaticKernelPoT3Water;
+ }
+ else if( filter == IM_REDUCE_FILTER_PLANTMAP )
+ {
+ if( ( bytesperpixel == 4 ) && ( pow2flag ) )
+ applykernel = imStaticKernelPoT4Plant;
+ }
+
+ if( !applykernel )
+ return 0;
+
+#if CPU_SSE2_SUPPORT
+ corebase = -state.matrixoffset;
+ corerange = ( newwidth + state.matrixoffset ) - corebase;
+#endif
+
+ state.dithersum = 0.0f;
+ if( ( newwidth | newheight ) > 2 )
+ state.dithersum = 0.5f;
+
+ state.srcdata = srcdata;
+ state.width1 = width * 1;
+ state.width2 = width * 2;
+ state.width3 = width * 3;
+ state.width4 = width * 4;
+ state.height = height;
+ state.bytesperline = bytesperline;
+
+ state.minimumalpha = 4;
+ state.minimumalphaf = (float)state.minimumalpha;
+ state.amplifynormal = fmaxf( 1.0f, options->amplifynormal );
+ state.normalsustainfactor = options->normalsustainfactor;
+
+ basex = ( state.matrixoffset + ( width << 8 ) ) % width;
+ basey = ( state.matrixoffset + ( height << 8 ) ) % height;
+ while( basex < 0 )
+ basex += width;
+ while( basey < 0 )
+ basey += height;
+
+#if CPU_SSE2_SUPPORT
+ if( applykernelcore )
+ {
+ dst = dstdata;
+ pointy = basey;
+ for( y = 0 ; y < newheight ; y++ )
+ {
+ pointx = basex;
+ for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
+ {
+ ( (unsigned int)( x - corebase ) < corerange ? applykernelcore : applykernel )( dst, pointx, pointy, &state );
+ pointx += sizedivisor;
+ while( pointx >= width )
+ pointx -= width;
+ }
+ pointy += sizedivisor;
+ while( pointy >= height )
+ pointy -= height;
+ }
+ }
+ else
+#endif
+ {
+ dst = dstdata;
+ pointy = basey;
+ for( y = 0 ; y < newheight ; y++ )
+ {
+ pointx = basex;
+ for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
+ {
+ applykernel( dst, pointx, pointy, &state );
+ pointx += sizedivisor;
+ while( pointx >= width )
+ pointx -= width;
+ }
+ pointy += sizedivisor;
+ while( pointy >= height )
+ pointy -= height;
+ }
+ }
+
+ imFreeStaticState( &state );
+
+ return 1;
+}
+
+
+int imReduceImageKaiserDivisor( imgImage *imgdst, imgImage *imgsrc, int sizedivisor, imReduceOptions *options )
+{
+ int width, height;
+ int newwidth, newheight, retvalue;
+
+ width = imgsrc->format.width;
+ height = imgsrc->format.height;
+ newwidth = ( width < sizedivisor ) ? 1 : ( ( width + sizedivisor - 1 ) / sizedivisor );
+ newheight = ( height < sizedivisor ) ? 1 : ( ( height + sizedivisor - 1 ) / sizedivisor );
+
+ imgdst->format.width = newwidth;
+ imgdst->format.height = newheight;
+ imgdst->format.type = imgsrc->format.type;
+ imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
+ imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
+ imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
+ if( !( imgdst->data ) )
+ return 0;
+
+ retvalue = imReduceImageKaiserDataDivisor( imgdst->data, imgsrc->data, width, height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, sizedivisor, options );
+
+ return retvalue;
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+static void imDynamicKernel1Linear( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0;
+ float matrixsum;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ matrixsum += f;
+ mapx++;
+ if( mapx >= state->width1 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ sum0 /= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+
+ return;
+}
+
+
+static void imDynamicKernel2Linear( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1;
+ float matrixsum;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 1;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ matrixsum += f;
+ mapx += 2;
+ if( mapx >= state->width2 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+
+ return;
+}
+
+
+static void imDynamicKernel3Linear( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2;
+ float matrixsum;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ matrixsum += f;
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+
+static void imDynamicKernel4Linear( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2, sum3;
+ float matrixsum;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ sum3 += f * (float)src[ mapx + 3 ];
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+static void imDynamicKernel4LinearAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2, sum3, alpha;
+ float matrixsum;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ alpha = (float)src[ mapx + 3 ] * f;
+ sum0 += alpha * (float)src[ mapx + 0 ];
+ sum1 += alpha * (float)src[ mapx + 1 ];
+ sum2 += alpha * (float)src[ mapx + 2 ];
+ sum3 += alpha;
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum0 * f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum1 * f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum2 * f ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+
+ return;
+}
+
+
+#if CPU_SSE2_SUPPORT
+
+static void imDynamicKernel4LinearAlphaNorm_Core( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ uint32_t pixel;
+ unsigned char *src;
+ __m128 vmatrixsum, vsum0, vsum1, vsum2, vsum3;
+ __m128 vlx, vly, vf, valpha, vr, vg, vb, va, vsrcf;
+ __m128i vsrc, vshufmask;
+ __m128i vzero;
+
+ #if CPU_SSSE3_SUPPORT
+ vshufmask = _mm_setr_epi8( 0x00,0x04,0x08,0x0c, 0x01,0x05,0x09,0x0d, 0x02,0x06,0x0a,0x0e, 0x03,0x07,0x0b,0x0f );
+ #endif
+ vsum0 = _mm_setzero_ps();
+ vsum1 = _mm_setzero_ps();
+ vsum2 = _mm_setzero_ps();
+ vsum3 = _mm_setzero_ps();
+ vmatrixsum = _mm_setzero_ps();
+ vzero = _mm_castps_si128( _mm_setzero_ps() );
+
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ vly = _mm_set1_ps( state->lineary[y] );
+ for( x = 0 ; x < state->matrixsizex ; x += 4 )
+ {
+ vlx = _mm_load_ps( &state->linearx[x] );
+ /* Load 16 bytes and unpack as RRRR,GGGG,BBBB,AAAA in one SSE register */
+ vsrc = _mm_loadu_si128( (void *)&src[ mapx ] );
+ #if CPU_SSSE3_SUPPORT
+ vsrc = _mm_shuffle_epi8( vsrc, vshufmask );
+ #else
+ vshufmask = _mm_shuffle_epi32( vsrc, 0x39 );
+ vsrc = _mm_unpacklo_epi16( _mm_unpacklo_epi8( vsrc, vshufmask ), _mm_unpackhi_epi8( vsrc, vshufmask ) );
+ #endif
+ /* Break that into 4 SSE registers as floats: vR,vG,vB,vA */
+ vsrcf = _mm_castsi128_ps( vsrc );
+ vr = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( vsrcf ), vzero ) );
+ #if CPU_SSE3_SUPPORT
+ vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehdup_ps( vsrcf ) ), vzero ) );
+ #else
+ vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0x55 ) ), vzero ) );
+ #endif
+ vb = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehl_ps( vsrcf, vsrcf ) ), vzero ) );
+ va = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0xff ) ), vzero ) );
+ vf = _mm_mul_ps( vlx, vly );
+ valpha = _mm_mul_ps( va, vf );
+ vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( vr, valpha ) );
+ vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( vg, valpha ) );
+ vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( vb, valpha ) );
+ vsum3 = _mm_add_ps( vsum3, valpha );
+ vmatrixsum = _mm_add_ps( vmatrixsum, vf );
+ mapx += 16;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ #if CPU_SSE3_SUPPORT
+ vmatrixsum = _mm_hadd_ps( vmatrixsum, vmatrixsum );
+ vmatrixsum = _mm_hadd_ps( vmatrixsum, vmatrixsum );
+ #else
+ vmatrixsum = _mm_add_ps( vmatrixsum, _mm_shuffle_ps( vmatrixsum, vmatrixsum, 0x4e ) );
+ vmatrixsum = _mm_add_ps( vmatrixsum, _mm_shuffle_ps( vmatrixsum, vmatrixsum, 0x39 ) );
+ #endif
+
+ #if CPU_SSE3_SUPPORT
+ vsum0 = _mm_hadd_ps( vsum0, vsum1 );
+ vsum2 = _mm_hadd_ps( vsum2, vsum3 );
+ vsum0 = _mm_hadd_ps( vsum0, vsum2 );
+ #else
+ vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum2 ), _mm_unpackhi_ps( vsum0, vsum2 ) );
+ vsum1 = _mm_add_ps( _mm_unpacklo_ps( vsum1, vsum3 ), _mm_unpackhi_ps( vsum1, vsum3 ) );
+ vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum1 ), _mm_unpackhi_ps( vsum0, vsum1 ) );
+ #endif
+ vsum0 = _mm_div_ps( vsum0, vmatrixsum );
+
+ valpha = _mm_shuffle_ps( vsum0, vsum0, 0xff );
+ pixel = 0;
+ if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
+ {
+ __m128i vpixel;
+ vsum0 = _mm_mul_ps( vsum0, _mm_rcp_ps( valpha ) );
+ vsum0 = CPU_BLENDV_PS( vsum0, valpha, *(__m128 *)simd4fAlphaMask );
+ vpixel = _mm_cvtps_epi32( vsum0 );
+ vpixel = _mm_packs_epi32( vpixel, vpixel );
+ vpixel = _mm_packus_epi16( vpixel, vpixel );
+ pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
+ }
+ *(uint32_t *)dst = pixel;
+
+ return;
+}
+
+#endif
+
+
+////
+
+
+static void imDynamicKernel1sRGB( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float sum0;
+#endif
+ float f, matrixsum;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+#endif
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ f = state->linearx[x] * state->lineary[y];
+ vsrc = _mm_set_ss( (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
+#else
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+#endif
+ matrixsum += f;
+ mapx++;
+ if( mapx >= state->width1 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
+ dst[0] = _mm_cvtsi128_si32( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) );
+#else
+ sum0 /= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+static void imDynamicKernel2sRGB( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float sum0, sum1;
+#endif
+ float f, matrixsum;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+#endif
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 1;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ f = state->linearx[x] * state->lineary[y];
+ vsrc = _mm_set_ps( 0.0f, 0.0f, (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
+#else
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+#endif
+ matrixsum += f;
+ mapx += 2;
+ if( mapx >= state->width2 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ union
+ {
+ char c[4];
+ uint32_t i;
+ } u;
+ vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
+ _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+ dst[0] = u.c[0];
+ dst[1] = u.c[1];
+#else
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+static void imDynamicKernel3sRGB( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float sum0, sum1, sum2;
+#endif
+ float f, matrixsum;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+#endif
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ f = state->linearx[x] * state->lineary[y];
+ vsrc = _mm_set_ps( 0.0f, (float)src[ mapx + 2 ], (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
+#else
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
+#endif
+ matrixsum += f;
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ union
+ {
+ char c[4];
+ uint32_t i;
+ } u;
+ vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
+ _mm_store_ss( (void *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+ dst[0] = u.c[0];
+ dst[1] = u.c[1];
+ dst[2] = u.c[2];
+#else
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+static void imDynamicKernel4sRGB( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc;
+ __m128i vzero;
+#else
+ float sum0, sum1, sum2, sum3;
+#endif
+ float f, matrixsum;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ f = state->linearx[x] * state->lineary[y];
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
+#else
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
+ sum3 += f * (float)src[ mapx + 3 ];
+#endif
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
+ _mm_store_ss( (void *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
+#else
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+#endif
+
+ return;
+}
+
+
+////
+
+
+static void imDynamicKernel4sRGBAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+#if CPU_SSE2_SUPPORT
+ __m128 vsum, vsrc, valpha;
+ __m128i vzero;
+ uint32_t pixel;
+#else
+ float sum0, sum1, sum2, sum3, alpha;
+#endif
+ float f, matrixsum;
+ unsigned char *src;
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_setzero_ps();
+ vzero = _mm_setzero_si128();
+#else
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+#endif
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+#if CPU_SSE2_SUPPORT
+ f = state->linearx[x] * state->lineary[y];
+ vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
+ valpha = _mm_shuffle_ps( vsrc, _mm_set_ss( 1.0f ), 0x0f );
+ vsrc = srgb2linear3( vsrc );
+ vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha, valpha, 0xC0 ), _mm_set1_ps( f ) ), vsrc ) );
+#else
+ f = state->linearx[x] * state->lineary[y];
+ alpha = (float)src[ mapx + 3 ] * f;
+ sum0 += alpha * srgb2linear( (float)src[ mapx + 0 ] );
+ sum1 += alpha * srgb2linear( (float)src[ mapx + 1 ] );
+ sum2 += alpha * srgb2linear( (float)src[ mapx + 2 ] );
+ sum3 += alpha;
+#endif
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+#if CPU_SSE2_SUPPORT
+ vsum = _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) );
+ valpha = _mm_shuffle_ps( vsum, vsum, 0xff );
+ pixel = 0;
+ if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
+ {
+ __m128i vpixel;
+ vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha ) );
+ vsum = CPU_BLENDV_PS( vsum, valpha, *(__m128 *)simd4fAlphaMask );
+ vsum = linear2srgb3( vsum );
+ vpixel = _mm_cvtps_epi32( vsum );
+ vpixel = _mm_packs_epi32( vpixel, vpixel );
+ vpixel = _mm_packus_epi16( vpixel, vpixel );
+ pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
+ }
+ *(uint32_t *)dst = pixel;
+#else
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * f ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+#endif
+
+ return;
+}
+
+
+////
+
+
+static void imDynamicKernel3Normal( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2;
+ float matrixsum, suminv;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ matrixsum += f;
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = (1.0f/255.0f) / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum0 -= 0.5f;
+ sum1 -= 0.5f;
+ sum2 -= 0.5f;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
+ sum0 = (0.5f*255.0f) + ( sum0 * suminv );
+ sum1 = (0.5f*255.0f) + ( sum1 * suminv );
+ sum2 = (0.5f*255.0f) + ( sum2 * suminv );
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+
+static void imDynamicKernel4Normal( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, sum0, sum1, sum2, sum3;
+ float matrixsum, suminv;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ sum0 += f * (float)src[ mapx + 0 ];
+ sum1 += f * (float)src[ mapx + 1 ];
+ sum2 += f * (float)src[ mapx + 2 ];
+ sum3 += f * (float)src[ mapx + 3 ];
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = (1.0f/255.0f) / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ sum0 -= 0.5f;
+ sum1 -= 0.5f;
+ sum2 -= 0.5f;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
+ sum0 = (0.5f*255.0f) + ( sum0 * suminv );
+ sum1 = (0.5f*255.0f) + ( sum1 * suminv );
+ sum2 = (0.5f*255.0f) + ( sum2 * suminv );
+ sum3 *= 255.0f;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+ return;
+}
+
+
+////
+
+
+static void imDynamicKernel3NormalSustain( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, v0, v1, v2, energy, sum0, sum1, sum2, sumenergy;
+ float matrixsum, suminv;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sumenergy = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ matrixsum += f;
+ mapx += 3;
+ if( mapx >= state->width3 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = (1.0f/255.0f) / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+
+ return;
+}
+
+static void imDynamicKernel4NormalSustain( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy;
+ float matrixsum, suminv;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ sumenergy = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
+ v3 = f * (float)src[ mapx + 3 ];
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ sum3 += v3;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = (1.0f/255.0f) / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ sum3 *= 255.0f;
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+
+
+ return;
+}
+
+
+////
+
+
+static void imDynamicKernel4NormalSustainAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
+{
+ int x, y, mapx, mapy;
+ float f, alpha, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy;
+ float matrixsum, suminv;
+ unsigned char *src;
+
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 0.0f;
+ sumenergy = 0.0f;
+ matrixsum = 0.0f;
+ mapy = state->matrixoffsety;
+ for( y = 0 ; y < state->matrixsizey ; y++ )
+ {
+ src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
+ mapx = state->matrixoffsetx << 2;
+ for( x = 0 ; x < state->matrixsizex ; x++ )
+ {
+ f = state->linearx[x] * state->lineary[y];
+ alpha = (float)src[ mapx + 3 ] * f;
+ v0 = alpha * ( (float)src[ mapx + 0 ] - 127.5f );
+ v1 = alpha * ( (float)src[ mapx + 1 ] - 127.5f );
+ v2 = alpha * ( (float)src[ mapx + 2 ] - 127.5f );
+ v3 = alpha;
+ sum0 += v0;
+ sum1 += v1;
+ sum2 += v2;
+ sum3 += v3;
+ energy = ( v0 * v0 ) + ( v1 * v1 );
+ if( energy )
+ sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
+ matrixsum += f;
+ mapx += 4;
+ if( mapx >= state->width4 )
+ mapx = 0;
+ }
+ mapy++;
+ if( mapy >= state->height )
+ mapy = 0;
+ }
+
+ matrixsum = 1.0f / matrixsum;
+ sum0 *= matrixsum;
+ sum1 *= matrixsum;
+ sum2 *= matrixsum;
+ sum3 *= matrixsum;
+ if( sum3 >= state->minimumalphaf )
+ {
+ f = 1.0f / sum3;
+ sum0 *= f;
+ sum1 *= f;
+ sum2 *= f;
+ sum0 *= state->amplifynormal;
+ sum1 *= state->amplifynormal;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
+ sumenergy *= state->normalsustainfactor;
+ if( energy < sumenergy )
+ {
+ f = fminf( sumenergy / energy, 8.0f );
+ sum0 *= f;
+ sum1 *= f;
+ suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
+ sum0 *= suminv;
+ sum1 *= suminv;
+ sum2 *= suminv;
+ }
+ sum0 += (0.5f*255.0f);
+ sum1 += (0.5f*255.0f);
+ sum2 += (0.5f*255.0f);
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
+ }
+ else
+ {
+ dst[0] = 0;
+ dst[1] = 0;
+ dst[2] = 0;
+ dst[3] = 0;
+ }
+
+ return;
+}
+
+
+////
+
+
+int imReduceImageKaiserData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int newwidth, int newheight, imReduceOptions *options )
+{
+ int filter, x, y;
+ float scalex, scaley, scaleinvx, scaleinvy;
+ float sourcex, sourcey;
+ unsigned char *dst;
+ imGenericMatrixState state;
+ void (*applykernel)( unsigned char *dst, imGenericMatrixState *state );
+#if CPU_SSE2_SUPPORT
+ void (*applykernelcore)( unsigned char *dst, imGenericMatrixState *state );
+#endif
+
+ filter = options->filter;
+ if( ( newwidth > width ) || ( newheight > height ) )
+ return 0;
+
+ applykernel = 0;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = 0;
+#endif
+
+ if( filter == IM_REDUCE_FILTER_LINEAR )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4Linear;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3Linear;
+ else if( bytesperpixel == 2 )
+ applykernel = imDynamicKernel2Linear;
+ else if( bytesperpixel == 1 )
+ applykernel = imDynamicKernel1Linear;
+ }
+ else if( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ {
+ applykernel = imDynamicKernel4LinearAlphaNorm;
+#if CPU_SSE2_SUPPORT
+ applykernelcore = imDynamicKernel4LinearAlphaNorm_Core;
+#endif
+ }
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3Linear;
+ else if( bytesperpixel == 2 )
+ applykernel = imDynamicKernel2Linear;
+ else if( bytesperpixel == 1 )
+ applykernel = imDynamicKernel1Linear;
+ }
+ else if( filter == IM_REDUCE_FILTER_SRGB )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4sRGB;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3sRGB;
+ else if( bytesperpixel == 2 )
+ applykernel = imDynamicKernel2sRGB;
+ else if( bytesperpixel == 1 )
+ applykernel = imDynamicKernel1sRGB;
+ }
+ else if( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4sRGBAlphaNorm;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3sRGB;
+ else if( bytesperpixel == 2 )
+ applykernel = imDynamicKernel2sRGB;
+ else if( bytesperpixel == 1 )
+ applykernel = imDynamicKernel1sRGB;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4Normal;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3Normal;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4NormalSustain;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3NormalSustain;
+ }
+ else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM )
+ {
+ if( bytesperpixel == 4 )
+ applykernel = imDynamicKernel4NormalSustainAlphaNorm;
+ else if( bytesperpixel == 3 )
+ applykernel = imDynamicKernel3NormalSustain;
+ }
+
+ if( !applykernel )
+ return 0;
+
+ state.minimumalpha = 4;
+ state.minimumalphaf = (float)state.minimumalpha;
+ state.amplifynormal = fmaxf( 1.0f, options->amplifynormal );
+ state.normalsustainfactor = options->normalsustainfactor;
+
+ state.dithersum = 0.0f;
+ if( ( newwidth | newheight ) > 2 )
+ state.dithersum = 0.5f;
+
+ state.srcdata = srcdata;
+ state.width1 = width * 1;
+ state.width2 = width * 2;
+ state.width3 = width * 3;
+ state.width4 = width * 4;
+ state.height = height;
+ state.bytesperline = bytesperline;
+
+ scalex = (float)newwidth / (float)width;
+ scaley = (float)newheight / (float)height;
+ scaleinvx = (float)width / (float)newwidth;
+ scaleinvy = (float)height / (float)newheight;
+
+ imAllocGenericState( &state, scalex, scaley, options->hopcount, options->alpha );
+
+#if CPU_SSE2_SUPPORT
+ if( applykernelcore )
+ {
+ dst = dstdata;
+ for( y = 0 ; y < newheight ; y++ )
+ {
+ sourcey = ( ( (float)y + 0.5f ) * scaleinvy ) - 0.5f;
+ imBuildGenericLinearY( &state, scaley, scaleinvy, sourcey, options->hopcount, options->alpha, height );
+ for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
+ {
+ sourcex = ( ( (float)x + 0.5f ) * scaleinvx ) - 0.5f;
+ imBuildGenericLinearX( &state, scalex, scaleinvx, sourcex, options->hopcount, options->alpha, width );
+ if( ( state.matrixoffsetx + ( ( state.matrixsizex + 3 ) & ~3 ) ) < width )
+ applykernelcore( dst, &state );
+ else
+ applykernel( dst, &state );
+ }
+ }
+ }
+ else
+#endif
+ {
+ dst = dstdata;
+ for( y = 0 ; y < newheight ; y++ )
+ {
+ sourcey = ( ( (float)y + 0.5f ) * scaleinvy ) - 0.5f;
+ imBuildGenericLinearY( &state, scaley, scaleinvy, sourcey, options->hopcount, options->alpha, height );
+ for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
+ {
+ sourcex = ( ( (float)x + 0.5f ) * scaleinvx ) - 0.5f;
+ imBuildGenericLinearX( &state, scalex, scaleinvx, sourcex, options->hopcount, options->alpha, width );
+ applykernel( dst, &state );
+ }
+ }
+ }
+
+ imFreeGenericState( &state );
+
+ return 1;
+}
+
+
+int imReduceImageKaiser( imgImage *imgdst, imgImage *imgsrc, int newwidth, int newheight, imReduceOptions *options )
+{
+ int retvalue;
+
+ imgdst->format.width = newwidth;
+ imgdst->format.height = newheight;
+ imgdst->format.type = imgsrc->format.type;
+ imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
+ imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
+ imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
+ if( !( imgdst->data ) )
+ return 0;
+
+ retvalue = imReduceImageKaiserData( imgdst->data, imgsrc->data, imgsrc->format.width, imgsrc->format.height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, newwidth, newheight, options );
+
+ return retvalue;
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox1Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox2Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
+ dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox3Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
+ dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
+ dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox4Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
+ dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
+ dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
+ dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox1sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ int i, offset[4];
+ float sum0;
+ offset[0] = 0;
+ offset[1] = bytesperpixel;
+ offset[2] = bytesperline;
+ offset[3] = bytesperline + bytesperpixel;
+ sum0 = 0.0f;
+ for( i = 0 ; i < 4 ; i++ )
+ sum0 += srgb2linear( (float)src[offset[i]+0] );
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox2sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ int i, offset[4];
+ float sum0, sum1;
+ offset[0] = 0;
+ offset[1] = bytesperpixel;
+ offset[2] = bytesperline;
+ offset[3] = bytesperline + bytesperpixel;
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ for( i = 0 ; i < 4 ; i++ )
+ {
+ sum0 += srgb2linear( (float)src[offset[i]+0] );
+ sum1 += srgb2linear( (float)src[offset[i]+1] );
+ }
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox3sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ int i, offset[4];
+ float sum0, sum1, sum2;
+ offset[0] = 0;
+ offset[1] = bytesperpixel;
+ offset[2] = bytesperline;
+ offset[3] = bytesperline + bytesperpixel;
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ for( i = 0 ; i < 4 ; i++ )
+ {
+ sum0 += srgb2linear( (float)src[offset[i]+0] );
+ sum1 += srgb2linear( (float)src[offset[i]+1] );
+ sum2 += srgb2linear( (float)src[offset[i]+2] );
+ }
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * 0.25f ) + 0.5f ) ) );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox4sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ int i, offset[4], sum3;
+ float sum0, sum1, sum2;
+ offset[0] = 0;
+ offset[1] = bytesperpixel;
+ offset[2] = bytesperline;
+ offset[3] = bytesperline + bytesperpixel;
+ sum0 = 0.0f;
+ sum1 = 0.0f;
+ sum2 = 0.0f;
+ sum3 = 2;
+ for( i = 0 ; i < 4 ; i++ )
+ {
+ sum0 += srgb2linear( (float)src[offset[i]+0] );
+ sum1 += srgb2linear( (float)src[offset[i]+1] );
+ sum2 += srgb2linear( (float)src[offset[i]+2] );
+ sum3 += (int)src[offset[i]+2];
+ }
+ dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
+ dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
+ dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * 0.25f ) + 0.5f ) ) );
+ dst[3] = (unsigned char)( sum3 >> 2 );
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox3Normal( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ float v0, v1, v2, suminv;
+
+ v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
+ v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
+ v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
+ v0 = 2.0f * ( v0 - 0.5f );
+ v1 = 2.0f * ( v1 - 0.5f );
+ v2 = 2.0f * ( v2 - 0.5f );
+ suminv = 0.5f / sqrtf( ( v0 * v0 ) + ( v1 * v1 ) + ( v2 * v2 ) );
+ v0 = 0.5f + ( v0 * suminv );
+ v1 = 0.5f + ( v1 * suminv );
+ v2 = 0.5f + ( v2 * suminv );
+ dst[0] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v0 );
+ dst[1] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v1 );
+ dst[2] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v2 );
+
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox4Normal( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ float v0, v1, v2, suminv;
+
+ v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
+ v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
+ v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
+ v0 = 2.0f * ( v0 - 0.5f );
+ v1 = 2.0f * ( v1 - 0.5f );
+ v2 = 2.0f * ( v2 - 0.5f );
+ suminv = 0.5f / sqrtf( ( v0 * v0 ) + ( v1 * v1 ) + ( v2 * v2 ) );
+ v0 = 0.5f + ( v0 * suminv );
+ v1 = 0.5f + ( v1 * suminv );
+ v2 = 0.5f + ( v2 * suminv );
+ dst[0] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v0 );
+ dst[1] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v1 );
+ dst[2] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v2 );
+ dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
+
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox3Water( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ float v0, v1, v2, suminv;
+
+ v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
+ v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
+ v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
+
+ v0 = 2.0f * ( v0 - 0.5f );
+ v1 = 2.0f * ( v1 - 0.5f );
+ suminv = sqrtf( ( v0 * v0 ) + ( v1 * v1 ) );
+ if( suminv < 0.75f )
+ {
+ suminv = 0.5f / suminv;
+ v0 = 0.5f + ( v0 * suminv );
+ v1 = 0.5f + ( v1 * suminv );
+ }
+ if( v2 > 0.1f )
+ {
+ *dithersum += v2;
+ if( v2 > 0.45f )
+ v2 = 1.0f;
+ else if( ( v2 < 0.3f ) && ( *dithersum < 1.0f ) )
+ v2 = 0.0f;
+ else
+ v2 = ( ( v2 + *dithersum ) < 0.45f ? 0.0f : 1.0f );
+ *dithersum -= v2;
+ }
+ v0 *= 255.0f;
+ v1 *= 255.0f;
+ v2 *= 255.0f;
+
+ dst[0] = (int)( fmaxf( 0.0f, fminf( 255.0f, v0 + 0.5f ) ) );
+ dst[1] = (int)( fmaxf( 0.0f, fminf( 255.0f, v1 + 0.5f ) ) );
+ dst[2] = (int)( fmaxf( 0.0f, fminf( 255.0f, v2 + 0.5f ) ) );
+
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox4Water( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ float v0, v1, v2, suminv;
+
+ v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
+ v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
+ v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
+
+ v0 = 2.0f * ( v0 - 0.5f );
+ v1 = 2.0f * ( v1 - 0.5f );
+ suminv = sqrtf( ( v0 * v0 ) + ( v1 * v1 ) );
+ if( suminv < 0.75f )
+ {
+ suminv = 0.5f / suminv;
+ v0 = 0.5f + ( v0 * suminv );
+ v1 = 0.5f + ( v1 * suminv );
+ }
+ if( v2 > 0.1f )
+ {
+ *dithersum += v2;
+ if( v2 > 0.45f )
+ v2 = 1.0f;
+ else if( ( v2 < 0.3f ) && ( *dithersum < 1.0f ) )
+ v2 = 0.0f;
+ else
+ v2 = ( ( v2 + *dithersum ) < 0.45f ? 0.0f : 1.0f );
+ *dithersum -= v2;
+ }
+ v0 *= 255.0f;
+ v1 *= 255.0f;
+ v2 *= 255.0f;
+
+ dst[0] = (int)( fmaxf( 0.0f, fminf( 255.0f, v0 + 0.5f ) ) );
+ dst[1] = (int)( fmaxf( 0.0f, fminf( 255.0f, v1 + 0.5f ) ) );
+ dst[2] = (int)( fmaxf( 0.0f, fminf( 255.0f, v2 + 0.5f ) ) );
+ dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
+
+ return;
+}
+
+static inline CC_ALWAYSINLINE void imReduceHalfBox4Plant( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
+{
+ int alpha;
+
+ dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
+ dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
+ dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
+
+ alpha = ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] );
+ alpha += alpha >> 2;
+ alpha = ( alpha + 2 ) >> 2;
+ if( alpha > 255 )
+ alpha = 255;
+ dst[3] = (unsigned char)alpha;
+
+ return;
+}
+
+
+static inline CC_ALWAYSINLINE void imReduceImageHalfBoxWork( unsigned char *dst, unsigned char *src, int width, int height, int bytesperpixel, int bytesperline, void (*work)( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum ) )
+{
+ int x, y, newwidth, newheight, rowoffset;
+ float dithersum;
+ newwidth = ( width < 2 ) ? 1 : ( ( width + 1 ) / 2 );
+ newheight = ( height < 2 ) ? 1 : ( ( height + 1 ) / 2 );
+ rowoffset = bytesperline + ( bytesperpixel * ( width - ( newwidth << 1 ) ) );
+ dithersum = 0.0f;
+ if( ( newwidth | newheight ) > 2 )
+ dithersum = 0.5f;
+ for( y = 0 ; y < newheight ; y++ )
+ {
+ for( x = 0 ; x < newwidth ; x++, src += bytesperpixel, dst += bytesperpixel )
+ {
+ work( dst, src, bytesperpixel, bytesperline, &dithersum );
+ src += bytesperpixel;
+ }
+ src += rowoffset;
+ }
+ return;
+}
+
+
+int imReduceImageHalfBoxData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, imReduceOptions *options )
+{
+ int filter, retval;
+
+ filter = options->filter;
+ retval = 1;
+ if( ( filter == IM_REDUCE_FILTER_LINEAR ) || ( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM ) )
+ {
+ if( bytesperpixel == 4 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Linear );
+ else if( bytesperpixel == 3 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Linear );
+ else if( bytesperpixel == 2 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox2Linear );
+ else if( bytesperpixel == 1 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox1Linear );
+ else
+ retval = 0;
+ }
+ else if( ( filter == IM_REDUCE_FILTER_SRGB ) || ( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM ) )
+ {
+ if( bytesperpixel == 4 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4sRGB );
+ else if( bytesperpixel == 3 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3sRGB );
+ else if( bytesperpixel == 2 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox2sRGB );
+ else if( bytesperpixel == 1 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox1sRGB );
+ else
+ retval = 0;
+ }
+ else if( ( filter == IM_REDUCE_FILTER_NORMALMAP ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_ALPHANORM ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM ) )
+ {
+ if( bytesperpixel == 4 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Normal );
+ else if( bytesperpixel == 3 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Normal );
+ else
+ retval = 0;
+ }
+ else if( filter == IM_REDUCE_FILTER_WATERMAP )
+ {
+ if( bytesperpixel == 4 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Water );
+ else if( bytesperpixel == 3 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Water );
+ else
+ retval = 0;
+ }
+ else if( filter == IM_REDUCE_FILTER_PLANTMAP )
+ {
+ if( bytesperpixel == 4 )
+ imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Plant );
+ else
+ retval = 0;
+ }
+ else
+ retval = 0;
+
+ return retval;
+}
+
+
+int imReduceImageHalfBox( imgImage *imgdst, imgImage *imgsrc, imReduceOptions *options )
+{
+ int newwidth, newheight, retvalue;
+
+ newwidth = ( ( imgsrc->format.width < 2 ) ? 1 : ( ( imgsrc->format.width + 1 ) / 2 ) );
+ newheight = ( ( imgsrc->format.height < 2 ) ? 1 : ( ( imgsrc->format.height + 1 ) / 2 ) );
+
+ imgdst->format.width = newwidth;
+ imgdst->format.height = newheight;
+ imgdst->format.type = imgsrc->format.type;
+ imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
+ imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
+ imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
+
+ retvalue = imReduceImageHalfBoxData( imgdst->data, imgsrc->data, imgsrc->format.width, imgsrc->format.height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, options );
+
+ return retvalue;
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+int imBuildMipmapCascade( imMipmapCascade *cascade, void *imagedata, int width, int height, int layercount, int bytesperpixel, int bytesperline, imReduceOptions *options, int cascadeflags )
+{
+ int layerindex, level, srclevel, srcwidth, srcheight, method, divisor;
+ int levelwidth, levelheight;
+ void *src, *dst;
+
+ cascade->width = width;
+ cascade->height = height;
+ cascade->layercount = layercount;
+ cascade->bytesperpixel = bytesperpixel;
+ cascade->bytesperline = bytesperline;
+ cascade->options = options;
+
+ /* No need for mipmaps */
+ if( ( cascade->width == 1 ) && ( cascade->height == 1 ) )
+ return 1;
+ if( bytesperpixel != 4 )
+ cascadeflags &= ~( IM_CASCADE_FLAGS_COLOR_BORDER_BASE | IM_CASCADE_FLAGS_COLOR_BORDER_MIPMAPS );
+
+ /* Allocate all the mipmap levels */
+ if( !( layercount ) )
+ layercount = 1;
+ cascade->mipmap[0] = imagedata;
+ levelwidth = cascade->width;
+ levelheight = cascade->height;
+ for( level = 1 ; ; level++ )
+ {
+ levelwidth = ( levelwidth < 2 ) ? 1 : ( levelwidth >> 1 );
+ levelheight = ( levelheight < 2 ) ? 1 : ( levelheight >> 1 );
+ if( !( cascade->mipmap[level] = malloc( levelwidth * levelheight * layercount * bytesperpixel ) ) )
+ return 0;
+ if( ( levelwidth == 1 ) && ( levelheight == 1 ) )
+ break;
+ }
+ cascade->mipmap[level+1] = 0;
+
+ if( cascadeflags & IM_CASCADE_FLAGS_COLOR_BORDER_BASE )
+ imPropagateAlphaBorder( imagedata, width, height * layercount, bytesperpixel, bytesperline );
+
+ /* For every layer, compute all its mipmap */
+ for( layerindex = 0 ; layerindex < layercount ; layerindex++ )
+ {
+ levelwidth = cascade->width;
+ levelheight = cascade->height;
+ for( level = 1 ; cascade->mipmap[level] ; level++ )
+ {
+ levelwidth = ( levelwidth < 2 ) ? 1 : ( levelwidth >> 1 );
+ levelheight = ( levelheight < 2 ) ? 1 : ( levelheight >> 1 );
+ dst = ADDRESS( cascade->mipmap[level], layerindex * levelwidth * levelheight * bytesperpixel );
+
+ /* Decide what method and source level to pick */
+ if( ( levelwidth | levelheight ) >= 16 )
+ {
+ srclevel = level - 2;
+ if( srclevel < 0 )
+ srclevel = 0;
+ method = 1;
+ }
+ else
+ {
+ srclevel = level - 1;
+ method = 0;
+ }
+#if DEBUG_VERBOSE
+ printf( "Tex level %d, srclevel %d, layer %d, filter %d, method %d : %d x %d\n", level, srclevel, layerindex, options->filter, method, levelwidth, levelheight );
+#endif
+ srcwidth = width >> srclevel;
+ if( !( srcwidth ) )
+ srcwidth = 1;
+ srcheight = height >> srclevel;
+ if( !( srcheight ) )
+ srcheight = 1;
+ if( srclevel )
+ src = ADDRESS( cascade->mipmap[srclevel], layerindex * srcheight * srcwidth * bytesperpixel );
+ else
+ src = ADDRESS( cascade->mipmap[srclevel], layerindex * srcheight * cascade->bytesperline );
+
+ divisor = 1 << ( level - srclevel );
+ if( ( ( levelwidth * divisor ) != srcwidth ) || ( ( levelheight * divisor ) != srcheight ) )
+ method = 2;
+
+ if( method == 2 )
+ {
+ if( !( imReduceImageKaiserData( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, levelwidth, levelheight, options ) ) )
+ {
+ printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
+ return 0;
+ }
+ }
+ else if( method == 1 )
+ {
+ if( !( imReduceImageKaiserDataDivisor( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, divisor, options ) ) )
+ {
+ printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
+ return 0;
+ }
+ }
+ else
+ {
+ if( !( imReduceImageHalfBoxData( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, options ) ) )
+ {
+ printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
+ return 0;
+ }
+ }
+
+ if( cascadeflags & IM_CASCADE_FLAGS_COLOR_BORDER_MIPMAPS )
+ imPropagateAlphaBorder( dst, levelwidth, levelheight, bytesperpixel, levelwidth * bytesperpixel );
+ }
+ }
+
+ return 1;
+}
+
+
+void imFreeMipmapCascade( imMipmapCascade *cascade )
+{
+ int level;
+ for( level = 1 ; ; level++ )
+ {
+ if( !( cascade->mipmap[level] ) )
+ break;
+ free( cascade->mipmap[level] );
+ cascade->mipmap[level] = 0;
+ }
+ return;
+}
+
+
+////
+
+
+#define IM_PIXEL_ALPHA_MASK (0xff000000)
+#define IM_PIXEL_RGB_MASK (0x00ffffff)
+
+void imPropagateAlphaBorder( unsigned char *imagedata, int width, int height, int bytesperpixel, int bytesperline )
+{
+ int x, y, backtrackflag;
+ uint32_t pixel, refcolor, prevrowpixel;
+ uint32_t *row, *prevrow;
+
+ if( bytesperpixel != 4 )
+ return;
+ row = (uint32_t *)imagedata;
+ prevrow = row;
+ for( y = 0 ; y < height ; y++ )
+ {
+ refcolor = 0;
+ backtrackflag = 0;
+ for( x = 0 ; x < width ; x++ )
+ {
+ pixel = row[x];
+ prevrowpixel = prevrow[x];
+ if( pixel & IM_PIXEL_ALPHA_MASK )
+ {
+ /* Pixel has some color, spread to neighbor if applicable */
+ refcolor = pixel & IM_PIXEL_RGB_MASK;
+ if( backtrackflag )
+ {
+ row[x-1] = refcolor;
+ backtrackflag = 0;
+ }
+ if( !( prevrowpixel & IM_PIXEL_ALPHA_MASK ) )
+ prevrow[x] = refcolor;
+ }
+ else
+ {
+ /* Pixel is fully transparent, spread from neighbor if applicable */
+ if( refcolor )
+ {
+ row[x] = refcolor;
+ backtrackflag = 0;
+ refcolor = 0;
+ }
+ else if( prevrowpixel & IM_PIXEL_ALPHA_MASK )
+ {
+ row[x] = prevrowpixel & IM_PIXEL_RGB_MASK;
+ backtrackflag = 0;
+ }
+ else
+ backtrackflag = 1;
+ }
+ }
+ prevrow = row;
+ row = ADDRESS( row, bytesperline );
+ }
+
+ return;
+}
+
+
diff --git a/lib/graphics_utils/mipmap/imgresize.h b/lib/graphics_utils/mipmap/imgresize.h
new file mode 100644
index 000000000..b47d07a6b
--- /dev/null
+++ b/lib/graphics_utils/mipmap/imgresize.h
@@ -0,0 +1,150 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2014-2017 Alexis Naveros.
+ * Portions developed under contract to the SURVICE Engineering Company.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * -----------------------------------------------------------------------------
+ */
+
+
+#ifndef IMGRESIZE_H
+#define IMGRESIZE_H
+
+
+typedef struct
+{
+ /* Specify filter type, from the IM_REDUCE_FILTER_* list */
+ int filter;
+ /* High quality, a little slow: hopcount=3; */
+ /* Good quality, much faster: hopcount=2; */
+ int hopcount;
+ /* Strong preservation/amplification of details: alpha=2.0f; */
+ /* Mild preservation/amplification of details: alpha=6.0f; */
+ float alpha;
+ /* NORMALMAP filters: factor to amyplify normals on X and Y before normalization */
+ float amplifynormal;
+ /* NORMALMAP_SUSTAIN filters: Preserve a factor of deviation "energy" as calculated by sqrtf(x*x+y*y) */
+ float normalsustainfactor;
+} imReduceOptions;
+
+static inline void imReduceSetOptions( imReduceOptions *options, int filter, int hopcount, float alpha, float amplifynormal, float normalsustainfactor )
+{
+ options->filter = filter;
+ options->hopcount = hopcount;
+ options->alpha = alpha;
+ options->amplifynormal = amplifynormal;
+ options->normalsustainfactor = normalsustainfactor;
+ return;
+}
+
+
+/* Reduce the image's dimensions by an integer divisor ~ this is fairly fast */
+int imReduceImageKaiserDataDivisor( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int sizedivisor, imReduceOptions *options );
+/* Same as imReduceImageKaiserDataDivisor(), but imgdst is allocated */
+int imReduceImageKaiserDivisor( imgImage *imgdst, imgImage *imgsrc, int sizedivisor, imReduceOptions *options );
+
+
+/* Reduce the image's dimensions to match the newwidth and newheight ~ this is a little slower */
+int imReduceImageKaiserData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int newwidth, int newheight, imReduceOptions *options );
+/* Same as imReduceImageKaiserData(), but imgdst is allocated */
+int imReduceImageKaiser( imgImage *imgdst, imgImage *imgsrc, int newwidth, int newheight, imReduceOptions *options );
+
+
+/* Resize by half with a dumb box filter ~ don't use that except for the smallest mipmaps */
+/* Filters with ALPHANORM and/or SUSTAIN keywords are processed as the regular base filter only */
+int imReduceImageHalfBoxData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, imReduceOptions *options );
+int imReduceImageHalfBox( imgImage *imgdst, imgImage *imgsrc, imReduceOptions *options );
+
+
+/*
+Keywords for image reduction filters
+
+LINEAR: Data is linear, note that this is *not* the format of typical diffuse textures
+SRGB: Color is in sRGB space, any alpha is presumed linear
+NORMALMAP: RGB represents a XYZ vector as (2.0*RGB)-1.0f, any alpha is presumed linear
+
+ALPHANORM: Alpha normalization, the weight of pixels is proportional to their alpha values
+ (do you have "black" fully transparent pixels? please use an ALPHANORM filter)
+SUSTAIN: The "energy" of the normal map is sustained, amplified to preserve the level of details
+ Note that this filter is rather slow (set options->normalsustainfactor to 0.75 or so)
+*/
+
+enum
+{
+ /* Linear space */
+ IM_REDUCE_FILTER_LINEAR,
+ IM_REDUCE_FILTER_LINEAR_ALPHANORM,
+
+ /* sRGB space (probably what you want for diffuse textures) */
+ IM_REDUCE_FILTER_SRGB,
+ IM_REDUCE_FILTER_SRGB_ALPHANORM,
+
+ /* RGB represents a XYZ vector as (2.0*RGB)-1.0f, any alpha is presumed linear */
+ IM_REDUCE_FILTER_NORMALMAP,
+ IM_REDUCE_FILTER_NORMALMAP_ALPHANORM,
+ IM_REDUCE_FILTER_NORMALMAP_SUSTAIN,
+ IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM,
+
+ /* Custom specialized filters */
+ IM_REDUCE_FILTER_WATERMAP,
+ IM_REDUCE_FILTER_PLANTMAP,
+ IM_REDUCE_FILTER_FOLLIAGE,
+ IM_REDUCE_FILTER_SKY,
+ IM_REDUCE_FILTER_FOG
+};
+
+
+////
+
+
+#define IM_MIPMAP_CASCADE_MAX (16)
+
+typedef struct
+{
+ int width;
+ int height;
+ int layercount;
+ int bytesperpixel;
+ int bytesperline;
+ imReduceOptions *options;
+ void *mipmap[IM_MIPMAP_CASCADE_MAX];
+} imMipmapCascade;
+
+
+int imBuildMipmapCascade( imMipmapCascade *cascade, void *imagedata, int width, int height, int layercount, int bytesperpixel, int bytesperline, imReduceOptions *options, int cascadeflags );
+
+void imFreeMipmapCascade( imMipmapCascade *cascade );
+
+/* For base texture, propagate RGB channels to neighbors if they are fully transparent (ignored if bytesperpixel != 4 ) */
+#define IM_CASCADE_FLAGS_COLOR_BORDER_BASE (0x1)
+/* For generated mipmaps, propagate RGB channels to neighbors if they are fully transparent (ignored if bytesperpixel != 4 ) */
+#define IM_CASCADE_FLAGS_COLOR_BORDER_MIPMAPS (0x2)
+
+
+////
+
+
+void imPropagateAlphaBorder( unsigned char *imagedata, int width, int height, int bytesperpixel, int bytesperline );
+
+
+////
+
+
+#endif
+
diff --git a/lib/irrlicht/include/IImage.h b/lib/irrlicht/include/IImage.h
index a2a93b252..7918759b5 100644
--- a/lib/irrlicht/include/IImage.h
+++ b/lib/irrlicht/include/IImage.h
@@ -98,6 +98,8 @@ public:
//! fills the surface with given color
virtual void fill(const SColor &color) =0;
+ virtual void setDeleteMemory(bool val) = 0;
+
//! get the amount of Bits per Pixel of the given color format
static u32 getBitsPerPixelFromFormat(const ECOLOR_FORMAT format)
{
diff --git a/lib/irrlicht/include/IImageLoader.h b/lib/irrlicht/include/IImageLoader.h
index 5ff091d9e..ba0da6886 100644
--- a/lib/irrlicht/include/IImageLoader.h
+++ b/lib/irrlicht/include/IImageLoader.h
@@ -42,7 +42,9 @@ public:
//! Creates a surface from the file
/** \param file File handle to check.
\return Pointer to newly created image, or 0 upon error. */
- virtual IImage* loadImage(io::IReadFile* file) const = 0;
+ virtual IImage* loadImage(io::IReadFile* file, bool skip_checking = false) const = 0;
+ virtual core::dimension2du getImageSize(io::IReadFile* file) const { return core::dimension2du(0, 0); }
+ virtual bool supportThreadedLoading() const { return false; }
};
diff --git a/lib/irrlicht/include/ITexture.h b/lib/irrlicht/include/ITexture.h
index 0586ec673..950544ce5 100644
--- a/lib/irrlicht/include/ITexture.h
+++ b/lib/irrlicht/include/ITexture.h
@@ -198,6 +198,16 @@ public:
virtual u64 getHandle() = 0;
virtual void unloadHandle() {}
+
+ virtual u32 getTextureSize() const { return 0; }
+
+ virtual void threadedReload(void* ptr, void* param) const {}
+
+ virtual void threadedSubImage(void* ptr) const {}
+
+ virtual void cleanThreadedLoader() {}
+
+ virtual int getThreadedLoadTextureCounter() const { return 0; }
protected:
//! Helper function, helps to get the desired texture creation format from the flags.
diff --git a/lib/irrlicht/include/IVideoDriver.h b/lib/irrlicht/include/IVideoDriver.h
index 3f3851425..77e854cf0 100644
--- a/lib/irrlicht/include/IVideoDriver.h
+++ b/lib/irrlicht/include/IVideoDriver.h
@@ -1186,7 +1186,7 @@ namespace video
\return The created image.
If you no longer need the image, you should call IImage::drop().
See IReferenceCounted::drop() for more information. */
- virtual IImage* createImageFromFile(io::IReadFile* file) =0;
+ virtual IImage* createImageFromFile(io::IReadFile* file, video::IImageLoader** loader = NULL) =0;
//! Writes the provided image to a file.
/** Requires that there is a suitable image writer registered
diff --git a/lib/irrlicht/include/utfwrapping.h b/lib/irrlicht/include/utfwrapping.h
index 8d67c6ea3..604b3c7f6 100644
--- a/lib/irrlicht/include/utfwrapping.h
+++ b/lib/irrlicht/include/utfwrapping.h
@@ -104,9 +104,10 @@ bool breakable (wchar_t c)
if ((c > 12287 && c < 40960) || //Common CJK words
(c > 44031 && c < 55204) || //Hangul
(c > 63743 && c < 64256) || //More Chinese
- c == 173 || c == L' ' || c == 0) //Soft hyphen and white space
- return true;
- return false;
+ c == 173 || c == L' ' || //Soft hyphen and white space
+ c == 47 || c == 92) //Slash and blackslash
+ return true;
+ return false;
}
} // end namespace gui
} // end namespace irr
diff --git a/lib/irrlicht/source/Irrlicht/CImage.cpp b/lib/irrlicht/source/Irrlicht/CImage.cpp
index d5677320e..60ff3702a 100644
--- a/lib/irrlicht/source/Irrlicht/CImage.cpp
+++ b/lib/irrlicht/source/Irrlicht/CImage.cpp
@@ -457,6 +457,10 @@ inline SColor CImage::getPixelBox( s32 x, s32 y, s32 fx, s32 fy, s32 bias ) cons
return c;
}
+void CImage::setDeleteMemory(bool val)
+{
+ DeleteMemory = val;
+}
} // end namespace video
} // end namespace irr
diff --git a/lib/irrlicht/source/Irrlicht/CImage.h b/lib/irrlicht/source/Irrlicht/CImage.h
index 82b34c776..8eb8459bc 100644
--- a/lib/irrlicht/source/Irrlicht/CImage.h
+++ b/lib/irrlicht/source/Irrlicht/CImage.h
@@ -103,6 +103,8 @@ public:
//! fills the surface with given color
virtual void fill(const SColor &color);
+ virtual void setDeleteMemory(bool val);
+
private:
//! assumes format and size has been set and creates the rest
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.cpp b/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.cpp
index 2ccb64cca..7bed41196 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.cpp
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.cpp
@@ -216,7 +216,7 @@ void CImageLoaderBMP::decompress4BitRLE(u8*& bmpData, s32 size, s32 width, s32 h
//! creates a surface from the file
-IImage* CImageLoaderBMP::loadImage(io::IReadFile* file) const
+IImage* CImageLoaderBMP::loadImage(io::IReadFile* file, bool skip_checking) const
{
SBMPHeader header;
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.h b/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.h
index 9321ecc23..c5708dcc5 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.h
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderBMP.h
@@ -81,7 +81,7 @@ public:
virtual bool isALoadableFileFormat(io::IReadFile* file) const;
//! creates a surface from the file
- virtual IImage* loadImage(io::IReadFile* file) const;
+ virtual IImage* loadImage(io::IReadFile* file, bool skip_checking = false) const;
private:
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.cpp b/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.cpp
index 1811f31dd..f9e77dff0 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.cpp
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.cpp
@@ -135,7 +135,7 @@ bool CImageLoaderJPG::isALoadableFileFormat(io::IReadFile* file) const
}
//! creates a surface from the file
-IImage* CImageLoaderJPG::loadImage(io::IReadFile* file) const
+IImage* CImageLoaderJPG::loadImage(io::IReadFile* file, bool skip_checking) const
{
#ifndef _IRR_COMPILE_WITH_LIBJPEG_
os::Printer::log("Can't load as not compiled with _IRR_COMPILE_WITH_LIBJPEG_:", file->getFileName(), ELL_DEBUG);
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.h b/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.h
index d3d0633f3..d15c06dbe 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.h
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderJPG.h
@@ -49,7 +49,7 @@ public:
virtual bool isALoadableFileFormat(io::IReadFile* file) const;
//! creates a surface from the file
- virtual IImage* loadImage(io::IReadFile* file) const;
+ virtual IImage* loadImage(io::IReadFile* file, bool skip_checking = false) const;
private:
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.cpp b/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.cpp
index e612330ce..b7478c541 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.cpp
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.cpp
@@ -27,14 +27,14 @@ namespace video
// PNG function for error handling
static void png_cpexcept_error(png_structp png_ptr, png_const_charp msg)
{
- os::Printer::log("PNG fatal error", msg, ELL_ERROR);
+ printf("PNG fatal error: %s\n", msg);
longjmp(png_jmpbuf(png_ptr), 1);
}
// PNG function for warning handling
static void png_cpexcept_warn(png_structp png_ptr, png_const_charp msg)
{
- os::Printer::log("PNG warning", msg, ELL_WARNING);
+ //os::Printer::log("PNG warning", msg, ELL_WARNING);
}
// PNG function for file reading
@@ -86,7 +86,7 @@ bool CImageLoaderPng::isALoadableFileFormat(io::IReadFile* file) const
// load in the image data
-IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
+IImage* CImageLoaderPng::loadImage(io::IReadFile* file, bool skip_checking) const
{
#ifdef _IRR_COMPILE_WITH_LIBPNG_
if (!file)
@@ -96,27 +96,17 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
//Used to point to image rows
u8** RowPointers = 0;
- png_byte buffer[8];
- // Read the first few bytes of the PNG file
- if( file->read(buffer, 8) != 8 )
- {
- os::Printer::log("LOAD PNG: can't read file\n", file->getFileName(), ELL_ERROR);
+ if (skip_checking)
+ file->seek(8);
+ else if (!isALoadableFileFormat(file))
return 0;
- }
-
- // Check if it really is a PNG file
- if( png_sig_cmp(buffer, 0, 8) )
- {
- os::Printer::log("LOAD PNG: not really a png\n", file->getFileName(), ELL_ERROR);
- return 0;
- }
// Allocate the png read struct
png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING,
NULL, (png_error_ptr)png_cpexcept_error, (png_error_ptr)png_cpexcept_warn);
if (!png_ptr)
{
- os::Printer::log("LOAD PNG: Internal PNG create read struct failure\n", file->getFileName(), ELL_ERROR);
+ //os::Printer::log("LOAD PNG: Internal PNG create read struct failure\n", file->getFileName(), ELL_ERROR);
return 0;
}
@@ -124,7 +114,7 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr)
{
- os::Printer::log("LOAD PNG: Internal PNG create info struct failure\n", file->getFileName(), ELL_ERROR);
+ //os::Printer::log("LOAD PNG: Internal PNG create info struct failure\n", file->getFileName(), ELL_ERROR);
png_destroy_read_struct(&png_ptr, NULL, NULL);
return 0;
}
@@ -184,20 +174,6 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
if (ColorType==PNG_COLOR_TYPE_GRAY || ColorType==PNG_COLOR_TYPE_GRAY_ALPHA)
png_set_gray_to_rgb(png_ptr);
- int intent;
- const double screen_gamma = 2.2;
-
- if (png_get_sRGB(png_ptr, info_ptr, &intent))
- png_set_gamma(png_ptr, screen_gamma, 0.45455);
- else
- {
- double image_gamma;
- if (png_get_gAMA(png_ptr, info_ptr, &image_gamma))
- png_set_gamma(png_ptr, screen_gamma, image_gamma);
- else
- png_set_gamma(png_ptr, screen_gamma, 0.45455);
- }
-
// Update the changes in between, as we need to get the new color type
// for proper processing of the RGBA type
png_read_update_info(png_ptr, info_ptr);
@@ -229,7 +205,7 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
image = new CImage(ECF_R8G8B8, core::dimension2d(Width, Height));
if (!image)
{
- os::Printer::log("LOAD PNG: Internal PNG create image struct failure\n", file->getFileName(), ELL_ERROR);
+ //os::Printer::log("LOAD PNG: Internal PNG create image struct failure\n", file->getFileName(), ELL_ERROR);
png_destroy_read_struct(&png_ptr, NULL, NULL);
return 0;
}
@@ -238,7 +214,7 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
RowPointers = new png_bytep[Height];
if (!RowPointers)
{
- os::Printer::log("LOAD PNG: Internal PNG create row pointers failure\n", file->getFileName(), ELL_ERROR);
+ //os::Printer::log("LOAD PNG: Internal PNG create row pointers failure\n", file->getFileName(), ELL_ERROR);
png_destroy_read_struct(&png_ptr, NULL, NULL);
delete image;
return 0;
@@ -276,6 +252,26 @@ IImage* CImageLoaderPng::loadImage(io::IReadFile* file) const
#endif // _IRR_COMPILE_WITH_LIBPNG_
}
+core::dimension2du CImageLoaderPng::getImageSize(io::IReadFile* file) const
+{
+#ifdef _IRR_COMPILE_WITH_LIBPNG_
+ if (!file || !isALoadableFileFormat(file))
+ return core::dimension2du(0, 0);
+ core::dimension2d dim;
+ file->seek(16);
+ file->read(&dim.Width, 4);
+ file->seek(20);
+ file->read(&dim.Height, 4);
+ file->seek(0);
+#ifndef __BIG_ENDIAN__
+ dim.Width = os::Byteswap::byteswap(dim.Width);
+ dim.Height = os::Byteswap::byteswap(dim.Height);
+#endif
+ return dim;
+#else
+ return core::dimension2du(0, 0);
+#endif // _IRR_COMPILE_WITH_LIBPNG_
+}
IImageLoader* createImageLoaderPNG()
{
@@ -287,4 +283,3 @@ IImageLoader* createImageLoaderPNG()
}//end namespace video
#endif
-
diff --git a/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.h b/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.h
index a3faac2ab..789cfdd18 100644
--- a/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.h
+++ b/lib/irrlicht/source/Irrlicht/CImageLoaderPNG.h
@@ -25,15 +25,17 @@ class CImageLoaderPng : public IImageLoader
{
public:
- //! returns true if the file maybe is able to be loaded by this class
- //! based on the file extension (e.g. ".png")
- virtual bool isALoadableFileExtension(const io::path& filename) const;
+ //! returns true if the file maybe is able to be loaded by this class
+ //! based on the file extension (e.g. ".png")
+ virtual bool isALoadableFileExtension(const io::path& filename) const;
- //! returns true if the file maybe is able to be loaded by this class
- virtual bool isALoadableFileFormat(io::IReadFile* file) const;
+ //! returns true if the file maybe is able to be loaded by this class
+ virtual bool isALoadableFileFormat(io::IReadFile* file) const;
- //! creates a surface from the file
- virtual IImage* loadImage(io::IReadFile* file) const;
+ //! creates a surface from the file
+ virtual IImage* loadImage(io::IReadFile* file, bool skip_checking = false) const;
+ virtual core::dimension2du getImageSize(io::IReadFile* file) const;
+ virtual bool supportThreadedLoading() const { return true; }
};
diff --git a/lib/irrlicht/source/Irrlicht/CNullDriver.cpp b/lib/irrlicht/source/Irrlicht/CNullDriver.cpp
index 4ef9c44de..404e410c3 100644
--- a/lib/irrlicht/source/Irrlicht/CNullDriver.cpp
+++ b/lib/irrlicht/source/Irrlicht/CNullDriver.cpp
@@ -103,6 +103,7 @@ CNullDriver::CNullDriver(io::IFileSystem* io, const core::dimension2d& scre
// DriverAttributes->addInt("MaxGeometryVerticesOut", 0);
// DriverAttributes->addFloat("MaxTextureLODBias", 0.f);
DriverAttributes->addInt("Version", 1);
+ DriverAttributes->setAttribute("MAX_TEXTURE_SIZE", core::dimension2du(2048, 2048));
// DriverAttributes->addInt("ShaderLanguageVersion", 0);
// DriverAttributes->addInt("AntiAlias", 0);
@@ -1278,7 +1279,7 @@ IImage* CNullDriver::createImageFromFile(const io::path& filename)
//! Creates a software image from a file.
-IImage* CNullDriver::createImageFromFile(io::IReadFile* file)
+IImage* CNullDriver::createImageFromFile(io::IReadFile* file, video::IImageLoader** loader)
{
if (!file)
return 0;
@@ -1292,6 +1293,11 @@ IImage* CNullDriver::createImageFromFile(io::IReadFile* file)
{
if (SurfaceLoader[i]->isALoadableFileExtension(file->getFileName()))
{
+ if (loader)
+ {
+ *loader = SurfaceLoader[i];
+ return 0;
+ }
// reset file position which might have changed due to previous loadImage calls
file->seek(0);
image = SurfaceLoader[i]->loadImage(file);
@@ -1307,6 +1313,11 @@ IImage* CNullDriver::createImageFromFile(io::IReadFile* file)
file->seek(0);
if (SurfaceLoader[i]->isALoadableFileFormat(file))
{
+ if (loader)
+ {
+ *loader = SurfaceLoader[i];
+ return 0;
+ }
file->seek(0);
image = SurfaceLoader[i]->loadImage(file);
if (image)
diff --git a/lib/irrlicht/source/Irrlicht/CNullDriver.h b/lib/irrlicht/source/Irrlicht/CNullDriver.h
index 7a10cccad..3d19cfc86 100644
--- a/lib/irrlicht/source/Irrlicht/CNullDriver.h
+++ b/lib/irrlicht/source/Irrlicht/CNullDriver.h
@@ -348,7 +348,7 @@ namespace video
virtual IImage* createImageFromFile(const io::path& filename);
//! Creates a software image from a file.
- virtual IImage* createImageFromFile(io::IReadFile* file);
+ virtual IImage* createImageFromFile(io::IReadFile* file, video::IImageLoader** loader = NULL);
//! Creates a software image from a byte array.
/** \param useForeignMemory: If true, the image will use the data pointer
diff --git a/sources.cmake b/sources.cmake
index ddc029d4f..ba4868d71 100644
--- a/sources.cmake
+++ b/sources.cmake
@@ -1,5 +1,5 @@
-# Modify this file to change the last-modified date when you add/remove a file.
-# This will then trigger a new cmake run automatically.
+# Modify this file to change the last-modified date when you add/remove a file.
+# This will then trigger a new cmake run automatically.
file(GLOB_RECURSE STK_HEADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "src/*.hpp")
file(GLOB_RECURSE STK_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "src/*.cpp")
file(GLOB_RECURSE STK_SHADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "data/shaders/*")
diff --git a/src/config/hardware_stats.cpp b/src/config/hardware_stats.cpp
index f657c6409..d3f532f86 100644
--- a/src/config/hardware_stats.cpp
+++ b/src/config/hardware_stats.cpp
@@ -102,7 +102,7 @@ int getRAM()
*/
int getNumProcessors()
{
-#ifdef __linux__
+#if defined(__linux__) || defined(__CYGWIN__)
return sysconf(_SC_NPROCESSORS_CONF);
#endif
#ifdef WIN32
diff --git a/src/config/hardware_stats.hpp b/src/config/hardware_stats.hpp
index d2dfd0044..7558c040c 100644
--- a/src/config/hardware_stats.hpp
+++ b/src/config/hardware_stats.hpp
@@ -82,6 +82,7 @@ namespace HardwareStats
// ========================================================================
void reportHardwareStats();
const std::string& getOSVersion();
+ int getNumProcessors();
}; // HardwareStats
#endif
diff --git a/src/config/user_config.hpp b/src/config/user_config.hpp
index 6bdfa1ae8..0ecc320e3 100644
--- a/src/config/user_config.hpp
+++ b/src/config/user_config.hpp
@@ -553,6 +553,33 @@ namespace UserConfigParams
&m_video_group, "Max texture size when high definition textures are "
"disabled"));
+ PARAM_PREFIX BoolUserConfigParam m_hq_mipmap
+ PARAM_DEFAULT(BoolUserConfigParam(false, "hq_mipmap",
+ &m_video_group, "Generate mipmap for textures using "
+ "high quality method with SSE"));
+
+ // ---- Recording
+ PARAM_PREFIX GroupUserConfigParam m_recording_group
+ PARAM_DEFAULT( GroupUserConfigParam("Recording", "Recording Settings") );
+
+ PARAM_PREFIX BoolUserConfigParam m_record_bmp
+ PARAM_DEFAULT(BoolUserConfigParam(false, "record_bmp",
+ &m_recording_group, "Record video using uncompressed bitmap, notice: this "
+ "will require a lot of space and fast disk access."));
+
+ PARAM_PREFIX BoolUserConfigParam m_limit_game_fps
+ PARAM_DEFAULT(BoolUserConfigParam(true, "limit_game_fps",
+ &m_recording_group, "Limit game framerate not beyond the fps of recording "
+ "video."));
+
+ PARAM_PREFIX IntUserConfigParam m_record_compression
+ PARAM_DEFAULT(IntUserConfigParam(90, "record_compression",
+ &m_recording_group, "Specify the compression level of recording video"));
+
+ PARAM_PREFIX IntUserConfigParam m_record_fps
+ PARAM_DEFAULT(IntUserConfigParam(30, "record_fps",
+ &m_recording_group, "Specify the fps of recording video"));
+
// ---- Debug - not saved to config file
/** If gamepad debugging is enabled. */
PARAM_PREFIX bool m_unit_testing PARAM_DEFAULT(false);
@@ -617,9 +644,6 @@ namespace UserConfigParams
/** True if graphical profiler should be displayed */
PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false );
- /** True if hardware skinning should be enabled */
- PARAM_PREFIX bool m_hw_skinning_enabled PARAM_DEFAULT( false );
-
// not saved to file
// ---- Networking
diff --git a/src/graphics/camera_normal.cpp b/src/graphics/camera_normal.cpp
index 0c0e1aa65..2e4c12bc2 100644
--- a/src/graphics/camera_normal.cpp
+++ b/src/graphics/camera_normal.cpp
@@ -127,8 +127,13 @@ void CameraNormal::smoothMoveCamera(float dt)
delta2 = 1;
btTransform btt = m_kart->getTrans();
- m_kart_position = btt.getOrigin();// m_kart_position + (btt.getOrigin() - m_kart_position) * delta2;
- m_kart_rotation = m_kart_rotation.normalized().slerp(btt.getRotation().normalized(), delta2);
+ m_kart_position = btt.getOrigin();
+ btQuaternion q1, q2;
+ q1 = m_kart_rotation.normalized();
+ q2 = btt.getRotation().normalized();
+ if (dot(q1, q2) < 0.0f)
+ q2 = -q2;
+ m_kart_rotation = q1.slerp(q2, delta2);
btt.setOrigin(m_kart_position);
btt.setRotation(m_kart_rotation);
diff --git a/src/graphics/central_settings.cpp b/src/graphics/central_settings.cpp
index d3ce43759..18b07ca2d 100644
--- a/src/graphics/central_settings.cpp
+++ b/src/graphics/central_settings.cpp
@@ -52,6 +52,7 @@ void CentralVideoSettings::init()
hasGS = false;
hasTextureFilterAnisotropic = false;
hasTextureSwizzle = false;
+ hasPixelBufferObject = false;
#if defined(USE_GLES2)
hasBGRA = false;
@@ -196,6 +197,11 @@ void CentralVideoSettings::init()
hasTextureSwizzle = true;
Log::info("GLDriver", "ARB Texture Swizzle Present");
}
+ if (hasGLExtension("GL_ARB_pixel_buffer_object"))
+ {
+ hasPixelBufferObject = true;
+ Log::info("GLDriver", "ARB Pixel Buffer Object Present");
+ }
// Only unset the high def textures if they are set as default. If the
// user has enabled them (bit 1 set), then leave them enabled.
if (GraphicsRestrictions::isDisabled(GraphicsRestrictions::GR_HIGHDEFINITION_TEXTURES) &&
@@ -238,7 +244,7 @@ void CentralVideoSettings::init()
hasTextureStorage = true;
hasTextureSwizzle = true;
}
-
+
if (!GraphicsRestrictions::isDisabled(GraphicsRestrictions::GR_EXPLICIT_ATTRIB_LOCATION) &&
m_glsl == true)
{
@@ -476,4 +482,14 @@ bool CentralVideoSettings::isARBTextureSwizzleUsable() const
return m_glsl && hasTextureSwizzle;
}
+bool CentralVideoSettings::isARBPixelBufferObjectUsable() const
+{
+ return hasPixelBufferObject;
+}
+
+bool CentralVideoSettings::supportsThreadedTextureLoading() const
+{
+ return isARBPixelBufferObjectUsable() && isARBBufferStorageUsable() && isARBTextureStorageUsable();
+}
+
#endif // !SERVER_ONLY
diff --git a/src/graphics/central_settings.hpp b/src/graphics/central_settings.hpp
index 146160e0b..c2f0c4937 100644
--- a/src/graphics/central_settings.hpp
+++ b/src/graphics/central_settings.hpp
@@ -44,6 +44,7 @@ private:
bool hasMultiDrawIndirect;
bool hasTextureFilterAnisotropic;
bool hasTextureSwizzle;
+ bool hasPixelBufferObject;
#if defined(USE_GLES2)
bool hasBGRA;
@@ -84,6 +85,7 @@ public:
bool isARBExplicitAttribLocationUsable() const;
bool isEXTTextureFilterAnisotropicUsable() const;
bool isARBTextureSwizzleUsable() const;
+ bool isARBPixelBufferObjectUsable() const;
#if defined(USE_GLES2)
bool isEXTTextureFormatBGRA8888Usable() const;
@@ -98,6 +100,7 @@ public:
bool supportsComputeShadersFiltering() const;
bool supportsAsyncInstanceUpload() const;
bool supportsHardwareSkinning() const;
+ bool supportsThreadedTextureLoading() const;
// "Macro" around feature support and user config
bool isShadowEnabled() const;
diff --git a/src/graphics/hq_mipmap_generator.cpp b/src/graphics/hq_mipmap_generator.cpp
new file mode 100644
index 000000000..d0531d9b4
--- /dev/null
+++ b/src/graphics/hq_mipmap_generator.cpp
@@ -0,0 +1,120 @@
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2017 SuperTuxKart-Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "graphics/hq_mipmap_generator.hpp"
+#include "graphics/stk_tex_manager.hpp"
+#undef DUMP_MIPMAP
+#ifdef DUMP_MIPMAP
+#include "graphics/irr_driver.hpp"
+#include "utils/string_utils.hpp"
+#endif
+#include
+
+extern "C"
+{
+ #include
+ #include
+}
+
+// ----------------------------------------------------------------------------
+HQMipmapGenerator::HQMipmapGenerator(const io::path& name, uint8_t* data,
+ const core::dimension2d& size,
+ GLuint texture_name, TexConfig* tc)
+ : video::ITexture(name), m_orig_data(data), m_size(size),
+ m_texture_name(texture_name), m_texture_size(0),
+ m_mipmap_data(NULL), m_tex_config(tc)
+{
+ assert(m_tex_config != NULL);
+ unsigned width = m_size.Width;
+ unsigned height = m_size.Height;
+ while (true)
+ {
+ width = width < 2 ? 1 : width >> 1;
+ height = height < 2 ? 1 : height >> 1;
+ m_mipmap_sizes.emplace_back(core::dimension2du(width, height),
+ m_texture_size);
+ m_texture_size += width * height * 4;
+ if (width == 1 && height == 1)
+ break;
+ }
+ m_texture_size = unsigned(m_mipmap_sizes.back().second) + 4;
+ m_mipmap_data = malloc(sizeof(imMipmapCascade));
+} // HQMipmapGenerator
+
+// ----------------------------------------------------------------------------
+void HQMipmapGenerator::threadedReload(void* ptr, void* param) const
+{
+ imReduceOptions options;
+ imReduceSetOptions(&options,
+ m_tex_config->m_normal_map ?
+ IM_REDUCE_FILTER_NORMALMAP: m_tex_config->m_srgb ?
+ IM_REDUCE_FILTER_SRGB : IM_REDUCE_FILTER_LINEAR/*filter*/,
+ 2/*hopcount*/, 2.0f/*alpha*/, 1.0f/*amplifynormal*/,
+ 0.0f/*normalsustainfactor*/);
+ imMipmapCascade* mm_cascade = (imMipmapCascade*)m_mipmap_data;
+#ifdef DEBUG
+ int ret = imBuildMipmapCascade(mm_cascade, m_orig_data, m_size.Width,
+ m_size.Height, 1/*layercount*/, 4, m_size.Width * 4, &options, 0);
+ assert(ret == 1);
+#else
+ imBuildMipmapCascade(mm_cascade, m_orig_data, m_size.Width,
+ m_size.Height, 1/*layercount*/, 4, m_size.Width * 4, &options, 0);
+#endif
+ for (unsigned int i = 0; i < m_mipmap_sizes.size(); i++)
+ {
+ const unsigned size = m_mipmap_sizes[i].first.getArea() * 4;
+ memcpy((uint8_t*)ptr + m_mipmap_sizes[i].second,
+ mm_cascade->mipmap[i + 1], size);
+#ifdef DUMP_MIPMAP
+ video::IImage* image = irr_driver->getVideoDriver()
+ ->createImageFromData(video::ECF_A8R8G8B8, m_mipmap_sizes[i].first,
+ mm_cascade->mipmap[i + 1], false/*ownForeignMemory*/);
+ irr_driver->getVideoDriver()->writeImageToFile(image, std::string
+ (StringUtils::toString(i) + "_" +
+ StringUtils::getBasename(NamedPath.getPtr())).c_str());
+ image->drop();
+#endif
+ }
+} // threadedReload
+
+// ----------------------------------------------------------------------------
+void HQMipmapGenerator::threadedSubImage(void* ptr) const
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ glBindTexture(GL_TEXTURE_2D, m_texture_name);
+ for (unsigned int i = 0; i < m_mipmap_sizes.size(); i++)
+ {
+ glTexSubImage2D(GL_TEXTURE_2D, i + 1, 0, 0,
+ m_mipmap_sizes[i].first.Width, m_mipmap_sizes[i].first.Height,
+ GL_BGRA, GL_UNSIGNED_BYTE,
+ (uint8_t*)ptr + m_mipmap_sizes[i].second);
+ }
+ delete this;
+#endif
+} // threadedSubImage
+
+// ----------------------------------------------------------------------------
+void HQMipmapGenerator::cleanThreadedLoader()
+{
+ delete[] m_orig_data;
+ imFreeMipmapCascade((imMipmapCascade*)m_mipmap_data);
+ free(m_mipmap_data);
+} // cleanThreadedLoader
+
+#endif
diff --git a/src/graphics/hq_mipmap_generator.hpp b/src/graphics/hq_mipmap_generator.hpp
new file mode 100644
index 000000000..189827637
--- /dev/null
+++ b/src/graphics/hq_mipmap_generator.hpp
@@ -0,0 +1,103 @@
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2017 SuperTuxKart-Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#ifndef HEADER_HQ_MIPMAP_GENERATOR_HPP
+#define HEADER_HQ_MIPMAP_GENERATOR_HPP
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "graphics/gl_headers.hpp"
+#include "utils/no_copy.hpp"
+#include "utils/types.hpp"
+
+#include
+#include
+
+using namespace irr;
+struct TexConfig;
+
+class HQMipmapGenerator : public video::ITexture, NoCopy
+{
+private:
+ uint8_t* m_orig_data;
+
+ core::dimension2d m_size;
+
+ GLuint m_texture_name;
+
+ unsigned int m_texture_size;
+
+ void* m_mipmap_data;
+
+ TexConfig* m_tex_config;
+
+ std::vector, size_t> > m_mipmap_sizes;
+
+public:
+ // ------------------------------------------------------------------------
+ HQMipmapGenerator(const io::path& name, uint8_t* data,
+ const core::dimension2d& size, GLuint texture_name,
+ TexConfig* tc);
+ // ------------------------------------------------------------------------
+ virtual ~HQMipmapGenerator() {}
+ // ------------------------------------------------------------------------
+ virtual void* lock(video::E_TEXTURE_LOCK_MODE mode =
+ video::ETLM_READ_WRITE, u32 mipmap_level = 0)
+ { return NULL; }
+ // ------------------------------------------------------------------------
+ virtual void unlock() {}
+ // ------------------------------------------------------------------------
+ virtual const core::dimension2d& getOriginalSize() const
+ { return m_size; }
+ // ------------------------------------------------------------------------
+ virtual const core::dimension2d& getSize() const { return m_size; }
+ // ------------------------------------------------------------------------
+ virtual video::E_DRIVER_TYPE getDriverType() const
+ {
+#if defined(USE_GLES2)
+ return video::EDT_OGLES2;
+#else
+ return video::EDT_OPENGL;
+#endif
+ }
+ // ------------------------------------------------------------------------
+ virtual video::ECOLOR_FORMAT getColorFormat() const
+ { return video::ECF_A8R8G8B8; }
+ // ------------------------------------------------------------------------
+ virtual u32 getPitch() const { return 0; }
+ // ------------------------------------------------------------------------
+ virtual bool hasMipMaps() const { return false; }
+ // ------------------------------------------------------------------------
+ virtual void regenerateMipMapLevels(void* mipmap_data = NULL) {}
+ // ------------------------------------------------------------------------
+ virtual u32 getOpenGLTextureName() const { return m_texture_name; }
+ // ------------------------------------------------------------------------
+ virtual u64 getHandle() { return 0; }
+ // ------------------------------------------------------------------------
+ virtual unsigned int getTextureSize() const { return m_texture_size; }
+ // ------------------------------------------------------------------------
+ virtual void threadedReload(void* ptr, void* param) const;
+ // ------------------------------------------------------------------------
+ virtual void threadedSubImage(void* ptr) const;
+ // ------------------------------------------------------------------------
+ virtual void cleanThreadedLoader();
+
+}; // HQMipmapGenerator
+
+#endif
+
+#endif
diff --git a/src/graphics/irr_driver.cpp b/src/graphics/irr_driver.cpp
index 3c0e78783..22c1ab322 100644
--- a/src/graphics/irr_driver.cpp
+++ b/src/graphics/irr_driver.cpp
@@ -61,6 +61,7 @@
#include "states_screens/dialogs/confirm_resolution_dialog.hpp"
#include "states_screens/state_manager.hpp"
#include "tracks/track_manager.hpp"
+#include "utils/avi_writer.hpp"
#include "utils/constants.hpp"
#include "utils/log.hpp"
#include "utils/profiler.hpp"
@@ -146,6 +147,7 @@ IrrDriver::IrrDriver()
m_last_light_bucket_distance = 0;
m_clear_color = video::SColor(255, 100, 101, 140);
m_skinning_joint = 0;
+ m_recording = false;
} // IrrDriver
@@ -167,6 +169,9 @@ IrrDriver::~IrrDriver()
#endif
delete m_wind;
delete m_renderer;
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ AVIWriter::kill();
+#endif
} // ~IrrDriver
// ----------------------------------------------------------------------------
@@ -716,13 +721,11 @@ void IrrDriver::initDevice()
// ----------------------------------------------------------------------------
void IrrDriver::setMaxTextureSize()
{
- if( (UserConfigParams::m_high_definition_textures & 0x01) == 0)
- {
- io::IAttributes &att = m_video_driver->getNonConstDriverAttributes();
- att.setAttribute("MAX_TEXTURE_SIZE", core::dimension2du(
- UserConfigParams::m_max_texture_size,
- UserConfigParams::m_max_texture_size));
- }
+ const unsigned max =
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0 ?
+ UserConfigParams::m_max_texture_size : 2048;
+ io::IAttributes &att = m_video_driver->getNonConstDriverAttributes();
+ att.setAttribute("MAX_TEXTURE_SIZE", core::dimension2du(max, max));
} // setMaxTextureSize
// ----------------------------------------------------------------------------
@@ -923,6 +926,9 @@ void IrrDriver::applyResolutionSettings()
// (we're sure to update main.cpp at some point and forget this one...)
VAOManager::getInstance()->kill();
STKTexManager::getInstance()->kill();
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ AVIWriter::kill();
+#endif
// initDevice will drop the current device.
if (CVS->isGLSL())
{
@@ -1842,6 +1848,9 @@ void IrrDriver::update(float dt)
PropertyAnimator::get()->update(dt);
+ STKTexManager::getInstance()
+ ->checkThreadedLoadTextures(true/*util_queue_empty*/);
+
World *world = World::getWorld();
if (world)
@@ -1884,8 +1893,41 @@ void IrrDriver::update(float dt)
// menu.
//if(World::getWorld() && World::getWorld()->isRacePhase())
// printRenderStats();
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ if (m_recording)
+ AVIWriter::getInstance()->captureFrameBufferImage(dt);
+#endif
} // update
+// ----------------------------------------------------------------------------
+void IrrDriver::setRecording(bool val)
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ if (!CVS->isARBPixelBufferObjectUsable())
+ {
+ Log::warn("irr_driver", "PBO extension missing, can't record video.");
+ return;
+ }
+ if (m_recording == val)
+ return;
+ m_recording = val;
+ if (m_recording == true)
+ {
+ std::string track_name = World::getWorld() != NULL ?
+ race_manager->getTrackName() : "menu";
+ AVIWriter::setRecordingTarget(file_manager->getScreenshotDir() +
+ track_name);
+ AVIWriter::getInstance()->resetFrameBufferImage();
+ MessageQueue::add(MessageQueue::MT_GENERIC,
+ _("Video recording started."));
+ }
+ else
+ {
+ AVIWriter::getInstance()->stopRecording();
+ }
+#endif
+} // setRecording
+
// ----------------------------------------------------------------------------
void IrrDriver::requestScreenshot()
@@ -2095,4 +2137,3 @@ GLuint IrrDriver::getDepthStencilTexture()
return m_renderer->getDepthStencilTexture();
} // getDepthStencilTexture
-
diff --git a/src/graphics/irr_driver.hpp b/src/graphics/irr_driver.hpp
index f3f6ba15d..21d6a611c 100644
--- a/src/graphics/irr_driver.hpp
+++ b/src/graphics/irr_driver.hpp
@@ -163,6 +163,7 @@ private:
bool m_lightviz;
bool m_distortviz;
bool m_boundingboxesviz;
+ bool m_recording;
/** Background colour to reset a buffer. Can be changed by each track. */
irr::video::SColor m_clear_color;
@@ -414,6 +415,10 @@ public:
// ------------------------------------------------------------------------
bool getBoundingBoxesViz() { return m_boundingboxesviz; }
// ------------------------------------------------------------------------
+ bool isRecording() const { return m_recording; }
+ // ------------------------------------------------------------------------
+ void setRecording(bool val);
+ // ------------------------------------------------------------------------
u32 getRenderPass() { return m_renderpass; }
// ------------------------------------------------------------------------
std::vector getLights() { return m_lights; }
diff --git a/src/graphics/material.cpp b/src/graphics/material.cpp
index ce40bad83..fb82e1aa8 100644
--- a/src/graphics/material.cpp
+++ b/src/graphics/material.cpp
@@ -526,9 +526,9 @@ void Material::install(bool srgb, bool premul_alpha)
}
else
{
- m_texture = STKTexManager::getInstance()->getTexture
- (m_original_full_path, srgb, premul_alpha, false/*set_material*/,
- srgb/*mesh_tex*/);
+ TexConfig tc(srgb, premul_alpha, srgb/*mesh_tex*/);
+ m_texture = STKTexManager::getInstance()
+ ->getTexture(m_original_full_path, &tc);
}
if (m_texture == NULL) return;
@@ -771,9 +771,8 @@ void Material::setMaterialProperties(video::SMaterial *m, scene::IMeshBuffer* m
STKTexManager* stm = STKTexManager::getInstance();
if (m_gloss_map.size() > 0 && CVS->isDefferedEnabled())
{
- glossytex = stm->getTexture(m_gloss_map, false/*srgb*/,
- false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ TexConfig gtc(false/*srgb*/, false/*premul_alpha*/);
+ glossytex = stm->getTexture(m_gloss_map, >c);
}
else
{
@@ -787,9 +786,11 @@ void Material::setMaterialProperties(video::SMaterial *m, scene::IMeshBuffer* m
stm->STKTexManager::getInstance()->getUnicolorTexture(SColor(0, 0, 0, 0));
if (m_colorization_mask.size() > 0)
{
+ TexConfig cmtc(false/*srgb*/, false/*premul_alpha*/,
+ true/*mesh_tex*/, false/*set_material*/,
+ true/*color_mask*/);
colorization_mask_tex = stm->getTexture(m_colorization_mask,
- false/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/, false/*no_upload*/, true/*single_channel*/);
+ &cmtc);
}
m->setTexture(2, colorization_mask_tex);
}
@@ -845,32 +846,29 @@ void Material::setMaterialProperties(video::SMaterial *m, scene::IMeshBuffer* m
m->setTexture(1, glossytex);
return;
case SHADERTYPE_SPLATTING:
- tex = stm->getTexture(m_splatting_texture_1,
- true/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ {
+ TexConfig stc(true/*srgb*/, false/*premul_alpha*/,
+ true/*mesh_tex*/, false/*set_material*/);
+ tex = stm->getTexture(m_splatting_texture_1, &stc);
m->setTexture(3, tex);
if (m_splatting_texture_2.size() > 0)
{
- tex = stm->getTexture(m_splatting_texture_2,
- true/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ tex = stm->getTexture(m_splatting_texture_2, &stc);
}
m->setTexture(4, tex);
if (m_splatting_texture_3.size() > 0)
{
- tex = stm->getTexture(m_splatting_texture_3,
- true/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ tex = stm->getTexture(m_splatting_texture_3, &stc);
}
m->setTexture(5, tex);
if (m_splatting_texture_4.size() > 0)
{
- tex = stm->getTexture(m_splatting_texture_4,
- false/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ TexConfig s4tc(false/*srgb*/, false/*premul_alpha*/,
+ true/*mesh_tex*/, false/*set_material*/);
+ tex = stm->getTexture(m_splatting_texture_4, &s4tc);
}
m->setTexture(6, tex);
m->setTexture(7, glossytex);
@@ -878,6 +876,7 @@ void Material::setMaterialProperties(video::SMaterial *m, scene::IMeshBuffer* m
// Material and shaders
m->MaterialType = Shaders::getShader(ES_SPLATTING);
return;
+ }
case SHADERTYPE_WATER:
m->setTexture(1, irr_driver->getTexture(FileManager::TEXTURE,
"waternormals.jpg"));
@@ -912,9 +911,10 @@ void Material::setMaterialProperties(video::SMaterial *m, scene::IMeshBuffer* m
{
if (CVS->isDefferedEnabled())
{
- tex = stm->getTexture(m_normal_map_tex, false/*srgb*/,
- false/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ TexConfig nmtc(false/*srgb*/, false/*premul_alpha*/,
+ true/*mesh_tex*/, false/*set_material*/,
+ false/*color_mask*/, true/*normal_map*/);
+ tex = stm->getTexture(m_normal_map_tex, &nmtc);
}
else
tex = stm->STKTexManager::getInstance()->getUnicolorTexture(SColor(0, 0, 0, 0));
diff --git a/src/graphics/rtts.cpp b/src/graphics/rtts.cpp
index 76cfc85e4..ea2b35d12 100644
--- a/src/graphics/rtts.cpp
+++ b/src/graphics/rtts.cpp
@@ -49,10 +49,10 @@ static GLuint generateRTT(const core::dimension2du &res, GLint internalFormat, G
return result;
}
-RTT::RTT(size_t width, size_t height)
+RTT::RTT(size_t width, size_t height, float rtt_scale)
{
- m_width = width;
- m_height = height;
+ m_width = width * rtt_scale;
+ m_height = height * rtt_scale;
m_shadow_FBO = NULL;
m_RH_FBO = NULL;
m_RSM = NULL;
@@ -60,14 +60,13 @@ RTT::RTT(size_t width, size_t height)
using namespace video;
using namespace core;
- dimension2du res(int(width * UserConfigParams::m_scale_rtts_factor),
- int(height * UserConfigParams::m_scale_rtts_factor) );
+ dimension2du res(m_width, m_height);
const dimension2du half = res/2;
const dimension2du quarter = res/4;
const dimension2du eighth = res/8;
- const u16 shadowside = u16(1024 * UserConfigParams::m_scale_rtts_factor);
+ const u16 shadowside = u16(1024 * rtt_scale);
const dimension2du shadowsize0(shadowside, shadowside);
const dimension2du shadowsize1(shadowside / 2, shadowside / 2);
const dimension2du shadowsize2(shadowside / 4, shadowside / 4);
@@ -294,7 +293,12 @@ RTT::RTT(size_t width, size_t height)
glClear(GL_COLOR_BUFFER_BIT);
getFBO(FBO_COMBINED_DIFFUSE_SPECULAR).bind();
- glClearColor(.5, .5, .5, .5);
+ float color = 0.5;
+#if defined(USE_GLES2)
+ if (!CVS->isDefferedEnabled())
+ color = pow(color, 1. / 2.2);
+#endif
+ glClearColor(color, color, color, color);
glClear(GL_COLOR_BUFFER_BIT);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
#if !defined(USE_GLES2)
diff --git a/src/graphics/rtts.hpp b/src/graphics/rtts.hpp
index f5102ff00..4e38322c8 100644
--- a/src/graphics/rtts.hpp
+++ b/src/graphics/rtts.hpp
@@ -144,7 +144,7 @@ enum TypeRTT : unsigned int
class RTT
{
public:
- RTT(size_t width, size_t height);
+ RTT(size_t width, size_t height, float rtt_scale = 1.0f);
~RTT();
size_t getWidth () const { return m_width ; }
diff --git a/src/graphics/shader_based_renderer.cpp b/src/graphics/shader_based_renderer.cpp
index c1bcba02a..1d253cb58 100644
--- a/src/graphics/shader_based_renderer.cpp
+++ b/src/graphics/shader_based_renderer.cpp
@@ -199,10 +199,8 @@ void ShaderBasedRenderer::uploadLightingData() const
void ShaderBasedRenderer::computeMatrixesAndCameras(scene::ICameraSceneNode *const camnode,
size_t width, size_t height)
{
- float w = width * UserConfigParams::m_scale_rtts_factor;
- float h = height * UserConfigParams::m_scale_rtts_factor;
- m_current_screen_size = core::vector2df(w, h);
- m_shadow_matrices.computeMatrixesAndCameras(camnode, int(w), int(h),
+ m_current_screen_size = core::vector2df((float)width, (float)height);
+ m_shadow_matrices.computeMatrixesAndCameras(camnode, width, height,
m_rtts->getDepthStencilTexture());
} // computeMatrixesAndCameras
@@ -669,7 +667,8 @@ void ShaderBasedRenderer::onLoadWorld()
const core::recti &viewport = Camera::getCamera(0)->getViewport();
size_t width = viewport.LowerRightCorner.X - viewport.UpperLeftCorner.X;
size_t height = viewport.LowerRightCorner.Y - viewport.UpperLeftCorner.Y;
- RTT* rtts = new RTT(width, height);
+ RTT* rtts = new RTT(width, height, CVS->isDefferedEnabled() ?
+ UserConfigParams::m_scale_rtts_factor : 1.0f);
setRTT(rtts);
}
@@ -787,9 +786,7 @@ void ShaderBasedRenderer::render(float dt)
RaceGUIBase *rg = world->getRaceGUI();
if (rg) rg->update(dt);
- bool force_rtt = UserConfigParams::m_scale_rtts_factor != 1.0f;
-
- if (!CVS->isDefferedEnabled() && !force_rtt)
+ if (!CVS->isDefferedEnabled())
{
prepareForwardRenderer();
}
@@ -803,12 +800,10 @@ void ShaderBasedRenderer::render(float dt)
oss << "drawAll() for kart " << cam;
PROFILER_PUSH_CPU_MARKER(oss.str().c_str(), (cam+1)*60,
0x00, 0x00);
- camera->activate(!CVS->isDefferedEnabled() && !force_rtt);
+ camera->activate(!CVS->isDefferedEnabled());
rg->preRenderCallback(camera); // adjusts start referee
irr_driver->getSceneManager()->setActiveCamera(camnode);
- const core::recti &viewport = camera->getViewport();
-
if (!CVS->isDefferedEnabled())
glEnable(GL_FRAMEBUFFER_SRGB);
@@ -816,12 +811,12 @@ void ShaderBasedRenderer::render(float dt)
m_lighting_passes.updateLightsInfo(camnode, dt);
PROFILER_POP_CPU_MARKER();
PROFILER_PUSH_CPU_MARKER("UBO upload", 0x0, 0xFF, 0x0);
- computeMatrixesAndCameras(camnode, viewport.LowerRightCorner.X - viewport.UpperLeftCorner.X, viewport.LowerRightCorner.Y - viewport.UpperLeftCorner.Y);
+ computeMatrixesAndCameras(camnode, m_rtts->getWidth(), m_rtts->getHeight());
m_shadow_matrices.updateSunOrthoMatrices();
if(CVS->isARBUniformBufferObjectUsable())
uploadLightingData();
PROFILER_POP_CPU_MARKER();
- renderScene(camnode, dt, track->hasShadows(), force_rtt);
+ renderScene(camnode, dt, track->hasShadows(), false);
if (irr_driver->getBoundingBoxesViz())
{
@@ -830,7 +825,7 @@ void ShaderBasedRenderer::render(float dt)
debugPhysics();
- if (CVS->isDefferedEnabled() || force_rtt)
+ if (CVS->isDefferedEnabled())
{
renderPostProcessing(camera);
}
diff --git a/src/graphics/shader_files_manager.cpp b/src/graphics/shader_files_manager.cpp
index e7b37de05..6682af1b6 100644
--- a/src/graphics/shader_files_manager.cpp
+++ b/src/graphics/shader_files_manager.cpp
@@ -51,6 +51,62 @@ const std::string& ShaderFilesManager::getHeader()
return shader_header;
} // getHeader
+// ----------------------------------------------------------------------------
+void ShaderFilesManager::readFile(const std::string& file,
+ std::ostringstream& code)
+{
+ std::ifstream stream(file_manager->getShader(file), std::ios::in);
+
+ if (!stream.is_open())
+ {
+ Log::error("ShaderFilesManager", "Can not open '%s'.", file.c_str());
+ return;
+ }
+
+ const std::string stk_include = "#stk_include";
+ std::string line;
+
+ while (std::getline(stream, line))
+ {
+ const std::size_t pos = line.find(stk_include);
+
+ // load the custom file pointed by the #stk_include directive
+ if (pos != std::string::npos)
+ {
+ // find the start "
+ std::size_t pos = line.find("\"");
+ if (pos == std::string::npos)
+ {
+ Log::error("ShaderFilesManager", "Invalid #stk_include"
+ " line: '%s'.", line.c_str());
+ continue;
+ }
+
+ std::string filename = line.substr(pos + 1);
+
+ // find the end "
+ pos = filename.find("\"");
+ if (pos == std::string::npos)
+ {
+ Log::error("ShaderFilesManager", "Invalid #stk_include"
+ " line: '%s'.", line.c_str());
+ continue;
+ }
+
+ filename = filename.substr(0, pos);
+
+ // read the whole include file
+ readFile(filename, code);
+ }
+ else
+ {
+ code << "\n" << line;
+ }
+ }
+
+ stream.close();
+}
+
// ----------------------------------------------------------------------------
/** Loads a single shader. This is NOT cached, use addShaderFile for that.
* \param file Filename of the shader to load.
@@ -90,7 +146,9 @@ GLuint ShaderFilesManager::loadShader(const std::string &file, unsigned type)
if (CVS->isARBExplicitAttribLocationUsable())
{
+#if !defined(USE_GLES2)
code << "#extension GL_ARB_explicit_attrib_location : enable\n";
+#endif
code << "#define Explicit_Attrib_Location_Usable\n";
}
@@ -106,12 +164,14 @@ GLuint ShaderFilesManager::loadShader(const std::string &file, unsigned type)
code << "#define VSLayer\n";
if (CVS->needsRGBBindlessWorkaround())
code << "#define SRGBBindlessFix\n";
+ if (CVS->isDefferedEnabled())
+ code << "#define Advanced_Lighting_Enabled\n";
#if !defined(USE_GLES2)
// shader compilation fails with some drivers if there is no precision
// qualifier
if (type == GL_FRAGMENT_SHADER)
- code << "precision mediump float;\n";
+ code << "precision highp float;\n";
#else
int range[2], precision;
glGetShaderPrecisionFormat(GL_FRAGMENT_SHADER, GL_HIGH_FLOAT, range,
@@ -126,69 +186,7 @@ GLuint ShaderFilesManager::loadShader(const std::string &file, unsigned type)
code << getHeader();
- std::ifstream stream(file_manager->getShader(file), std::ios::in);
- if (stream.is_open())
- {
- const std::string stk_include = "#stk_include";
- std::string line;
-
- while (std::getline(stream, line))
- {
- const std::size_t pos = line.find(stk_include);
-
- // load the custom file pointed by the #stk_include directive
- if (pos != std::string::npos)
- {
- // find the start "
- std::size_t pos = line.find("\"");
- if (pos == std::string::npos)
- {
- Log::error("ShaderFilesManager", "Invalid #stk_include"
- " line: '%s'.", line.c_str());
- continue;
- }
-
- std::string filename = line.substr(pos + 1);
-
- // find the end "
- pos = filename.find("\"");
- if (pos == std::string::npos)
- {
- Log::error("ShaderFilesManager", "Invalid #stk_include"
- " line: '%s'.", line.c_str());
- continue;
- }
-
- filename = filename.substr(0, pos);
-
- // read the whole include file
- std::ifstream include_stream(file_manager->getShader(filename), std::ios::in);
- if (!include_stream.is_open())
- {
- Log::error("ShaderFilesManager", "Couldn't open included"
- " shader: '%s'.", filename.c_str());
- continue;
- }
-
- std::string include_line = "";
- while (std::getline(include_stream, include_line))
- {
- code << "\n" << include_line;
- }
- include_stream.close();
- }
- else
- {
- code << "\n" << line;
- }
- }
-
- stream.close();
- }
- else
- {
- Log::error("ShaderFilesManager", "Can not open '%s'.", file.c_str());
- }
+ readFile(file, code);
Log::info("ShaderFilesManager", "Compiling shader : %s", file.c_str());
const std::string &source = code.str();
diff --git a/src/graphics/shader_files_manager.hpp b/src/graphics/shader_files_manager.hpp
index aa11d90a0..f0ad1fbd9 100644
--- a/src/graphics/shader_files_manager.hpp
+++ b/src/graphics/shader_files_manager.hpp
@@ -38,6 +38,7 @@ private:
// ------------------------------------------------------------------------
const std::string& getHeader();
+ void readFile(const std::string& file, std::ostringstream& code);
public:
// ------------------------------------------------------------------------
diff --git a/src/graphics/skybox.cpp b/src/graphics/skybox.cpp
index 9c7a27a48..f8d503cd6 100644
--- a/src/graphics/skybox.cpp
+++ b/src/graphics/skybox.cpp
@@ -163,19 +163,6 @@ void Skybox::generateCubeMapFromTextures()
assert(img != NULL);
img->copyToScaling(rgba[i], size, size);
-#if defined(USE_GLES2)
- if (CVS->isEXTTextureFormatBGRA8888Usable())
- {
- // BGRA image returned by getTextureImage causes black sky in gles
- for (unsigned int j = 0; j < size * size; j++)
- {
- char tmp_val = rgba[i][j * 4];
- rgba[i][j * 4] = rgba[i][j * 4 + 2];
- rgba[i][j * 4 + 2] = tmp_val;
- }
- }
-#endif
-
if (i == 2 || i == 3)
{
char *tmp = new char[size * size * 4];
@@ -196,7 +183,8 @@ void Skybox::generateCubeMapFromTextures()
GL_COMPRESSED_SRGB_ALPHA : GL_SRGB_ALPHA;
GLint format = GL_BGRA;
#else
- GLint internal_format = GL_RGBA8;
+ GLint internal_format = CVS->isDefferedEnabled() ? GL_SRGB8_ALPHA8
+ : GL_RGBA8;
GLint format = GL_RGBA;
#endif
diff --git a/src/graphics/spherical_harmonics.cpp b/src/graphics/spherical_harmonics.cpp
index 3a4a4c9f7..c94149bbf 100644
--- a/src/graphics/spherical_harmonics.cpp
+++ b/src/graphics/spherical_harmonics.cpp
@@ -565,15 +565,12 @@ void SphericalHarmonics::setTextures(const std::vector &spher
assert(img != NULL);
img->copyToScaling(sh_rgba[i], sh_w, sh_h);
#if defined(USE_GLES2)
- if (!CVS->isEXTTextureFormatBGRA8888Usable())
+ // Code here assume color format is BGRA
+ for (unsigned int j = 0; j < sh_w * sh_h; j++)
{
- // Code here assume color format is BGRA
- for (unsigned int j = 0; j < sh_w * sh_h; j++)
- {
- char tmp_val = sh_rgba[i][j * 4];
- sh_rgba[i][j * 4] = sh_rgba[i][j * 4 + 2];
- sh_rgba[i][j * 4 + 2] = tmp_val;
- }
+ char tmp_val = sh_rgba[i][j * 4];
+ sh_rgba[i][j * 4] = sh_rgba[i][j * 4 + 2];
+ sh_rgba[i][j * 4 + 2] = tmp_val;
}
#endif
} //for (unsigned i = 0; i < 6; i++)
diff --git a/src/graphics/stars.cpp b/src/graphics/stars.cpp
index d1d54a1d2..13db505ae 100644
--- a/src/graphics/stars.cpp
+++ b/src/graphics/stars.cpp
@@ -40,9 +40,9 @@ Stars::Stars(AbstractKart *kart)
m_parent_kart_node = kart->getNode();
m_enabled = false;
+ TexConfig stc(true/*srgb*/, true/*premul_alpha*/);
video::ITexture* texture = STKTexManager::getInstance()->getTexture
- ("starparticle.png", true/*srgb*/, true/*premul_alpha*/,
- false/*set_material*/, true/*mesh_tex*/);
+ ("starparticle.png", &stc);
Material* star_material =
material_manager->getMaterial("starparticle.png");
diff --git a/src/graphics/stk_mesh_loader.cpp b/src/graphics/stk_mesh_loader.cpp
index b2918b61a..a11dca346 100644
--- a/src/graphics/stk_mesh_loader.cpp
+++ b/src/graphics/stk_mesh_loader.cpp
@@ -1044,10 +1044,11 @@ void STKMeshLoader::loadTextures(SB3dMaterial& material) const
else
full_path = fs->getFileBasename(B3dTexture->TextureName);
+ TexConfig mtc(i <= 1 ? true : false/*srgb*/, false/*premul_alpha*/,
+ true/*mesh_tex*/, true/*set_material*/);
video::ITexture* tex =
STKTexManager::getInstance()->getTexture(full_path.c_str(),
- i <= 1 ? true : false/*is_srgb*/, false/*premul_alpha*/,
- true/*set_material*/);
+ &mtc);
material.Material.setTexture(i, tex);
if (material.Textures[i]->Flags & 0x10) // Clamp U
diff --git a/src/graphics/stk_tex_manager.cpp b/src/graphics/stk_tex_manager.cpp
index aa39e60e8..4ea5ccd94 100644
--- a/src/graphics/stk_tex_manager.cpp
+++ b/src/graphics/stk_tex_manager.cpp
@@ -16,19 +16,107 @@
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "graphics/stk_tex_manager.hpp"
+#include "config/hardware_stats.hpp"
+#include "config/user_config.hpp"
#include "graphics/central_settings.hpp"
#include "graphics/materials.hpp"
+#include "graphics/threaded_tex_loader.hpp"
#include "graphics/stk_texture.hpp"
#include "io/file_manager.hpp"
#include "utils/string_utils.hpp"
#include "utils/log.hpp"
+#include
+
+// ----------------------------------------------------------------------------
+STKTexManager::STKTexManager() : m_pbo(0), m_thread_size(0),
+ m_threaded_load_textures_counter(0)
+{
+ createThreadedTexLoaders();
+} // STKTexManager
+
// ----------------------------------------------------------------------------
STKTexManager::~STKTexManager()
{
removeTexture(NULL/*texture*/, true/*remove_all*/);
+ destroyThreadedTexLoaders();
} // ~STKTexManager
+// ----------------------------------------------------------------------------
+void STKTexManager::createThreadedTexLoaders()
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ if (CVS->supportsThreadedTextureLoading())
+ {
+ pthread_mutex_init(&m_threaded_load_textures_mutex, NULL);
+ pthread_cond_init(&m_cond_request, NULL);
+ m_thread_size = HardwareStats::getNumProcessors();
+ if (m_thread_size == 0)
+ m_thread_size = 1;
+ m_thread_size = core::clamp(m_thread_size, 1,
+ UserConfigParams::m_hq_mipmap ? m_thread_size : 3);
+ const unsigned max_tex_size =
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0 ?
+ UserConfigParams::m_max_texture_size : 2048;
+ const unsigned each_capacity = max_tex_size * max_tex_size * 4;
+ const unsigned pbo_size = each_capacity * m_thread_size;
+ Log::info("STKTexManager", "%d thread(s) for texture loading,"
+ " each capacity %d MB.", m_thread_size,
+ each_capacity / 1024 / 1024);
+ if (UserConfigParams::m_hq_mipmap)
+ Log::info("STKTexManager", "High quality mipmap enabled.");
+ glGenBuffers(1, &m_pbo);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo);
+ glBufferStorage(GL_PIXEL_UNPACK_BUFFER, pbo_size, NULL,
+ GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT |
+ GL_MAP_COHERENT_BIT);
+ uint8_t* pbo_ptr = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER,
+ 0, pbo_size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT |
+ GL_MAP_COHERENT_BIT);
+ size_t offset = 0;
+ for (int i = 0; i < m_thread_size; i++)
+ {
+ m_all_tex_loaders.push_back(new ThreadedTexLoader(each_capacity,
+ offset, pbo_ptr + offset, &m_threaded_load_textures_mutex,
+ &m_cond_request, this));
+ offset += each_capacity;
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ }
+#endif
+} // createThreadedTexLoaders
+
+// ----------------------------------------------------------------------------
+void STKTexManager::destroyThreadedTexLoaders()
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ if (CVS->supportsThreadedTextureLoading())
+ {
+ STKTexture* delete_ttl = new STKTexture((uint8_t*)NULL, "delete_ttl",
+ 0, false, true);
+ for (int i = 0; i < m_thread_size; i++)
+ addThreadedLoadTexture(delete_ttl);
+ for (int i = 0; i < m_thread_size; i++)
+ {
+ if (!m_all_tex_loaders[i]->waitForReadyToDeleted(2.0f))
+ {
+ Log::info("STKTexManager", "ThreadedTexLoader %d not stopping,"
+ "exiting anyway.", i);
+ }
+ delete m_all_tex_loaders[i];
+ }
+ delete delete_ttl;
+ glDeleteBuffers(1, &m_pbo);
+ pthread_mutex_destroy(&m_threaded_load_textures_mutex);
+ pthread_cond_destroy(&m_cond_request);
+ m_pbo = 0;
+ m_thread_size = 0;
+ m_threaded_load_textures_counter = 0;
+ m_all_tex_loaders.clear();
+ }
+#endif
+} // destroyThreadedTexLoaders
+
// ----------------------------------------------------------------------------
STKTexture* STKTexManager::findTextureInFileSystem(const std::string& filename,
std::string* full_path)
@@ -55,10 +143,8 @@ STKTexture* STKTexManager::findTextureInFileSystem(const std::string& filename,
} // findTextureInFileSystem
// ----------------------------------------------------------------------------
-video::ITexture* STKTexManager::getTexture(const std::string& path, bool srgb,
- bool premul_alpha,
- bool set_material, bool mesh_tex,
- bool no_upload, bool single_channel,
+video::ITexture* STKTexManager::getTexture(const std::string& path,
+ TexConfig* tc, bool no_upload,
bool create_if_unfound)
{
auto ret = m_all_textures.find(path);
@@ -79,8 +165,7 @@ video::ITexture* STKTexManager::getTexture(const std::string& path, bool srgb,
if (create_if_unfound)
{
new_texture = new STKTexture(full_path.empty() ? path : full_path,
- srgb, premul_alpha, set_material, mesh_tex, no_upload,
- single_channel);
+ tc, no_upload);
if (new_texture->getOpenGLTextureName() == 0 && !no_upload)
{
const char* name = new_texture->getName().getPtr();
@@ -95,6 +180,11 @@ video::ITexture* STKTexManager::getTexture(const std::string& path, bool srgb,
delete new_texture;
return NULL;
}
+ if (new_texture->useThreadedLoading())
+ {
+ addThreadedLoadTexture(new_texture);
+ checkThreadedLoadTextures(false/*util_queue_empty*/);
+ }
}
if (create_if_unfound && !no_upload)
@@ -204,6 +294,10 @@ core::stringw STKTexManager::reloadTexture(const irr::core::stringw& name)
if (p.second == NULL || !p.second->isMeshTexture())
continue;
p.second->reload();
+ if (p.second->useThreadedLoading())
+ {
+ addThreadedLoadTexture(p.second);
+ }
Log::info("STKTexManager", "%s reloaded",
p.second->getName().getPtr());
}
@@ -226,6 +320,10 @@ core::stringw STKTexManager::reloadTexture(const irr::core::stringw& name)
if (fname == tex_name || fname == tex_path)
{
p.second->reload();
+ if (p.second->useThreadedLoading())
+ {
+ addThreadedLoadTexture(p.second);
+ }
result += tex_name.c_str();
result += L" ";
break;
@@ -271,3 +369,78 @@ void STKTexManager::setTextureErrorMessage(const std::string &error,
else
m_texture_error_message = StringUtils::insertValues(error, detail);
} // setTextureErrorMessage
+
+// ----------------------------------------------------------------------------
+void STKTexManager::checkThreadedLoadTextures(bool util_queue_empty)
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ if (!CVS->supportsThreadedTextureLoading()) return;
+ bool uploaded = false;
+ bool empty_queue = false;
+ if (util_queue_empty)
+ {
+ while (true)
+ {
+ pthread_mutex_lock(&m_threaded_load_textures_mutex);
+ empty_queue = m_threaded_load_textures_counter == 0;
+ pthread_mutex_unlock(&m_threaded_load_textures_mutex);
+ if (empty_queue)
+ {
+ for (ThreadedTexLoader* ttl : m_all_tex_loaders)
+ {
+ if (ttl->lastQueueReady())
+ {
+ ttl->lock();
+ ttl->setFinishLoading();
+ uploaded = true;
+ ttl->unlock(false/*finish_it*/);
+ }
+ }
+ break;
+ }
+ else
+ {
+ checkThreadedLoadTextures(false/*util_queue_empty*/);
+ }
+ }
+ }
+ if (empty_queue && !uploaded)
+ return;
+ uploaded = false;
+ for (ThreadedTexLoader* ttl : m_all_tex_loaders)
+ {
+ ttl->lock();
+ if (ttl->finishedLoading())
+ {
+ if (!uploaded)
+ {
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo);
+ uploaded = true;
+ }
+ ttl->handleCompletedTextures();
+ }
+ else
+ {
+ ttl->unlock(false/*finish_it*/);
+ }
+ }
+ if (uploaded)
+ {
+ GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ GLenum reason = glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, 0);
+ if (reason != GL_ALREADY_SIGNALED)
+ {
+ do
+ {
+ reason = glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT,
+ 1000000);
+ }
+ while (reason == GL_TIMEOUT_EXPIRED);
+ }
+ glDeleteSync(sync);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ for (ThreadedTexLoader* ttl : m_all_tex_loaders)
+ ttl->unlock(true/*finish_it*/);
+ }
+#endif
+} // checkThreadedLoadTextures
diff --git a/src/graphics/stk_tex_manager.hpp b/src/graphics/stk_tex_manager.hpp
index 47b151154..80163f491 100644
--- a/src/graphics/stk_tex_manager.hpp
+++ b/src/graphics/stk_tex_manager.hpp
@@ -23,17 +23,43 @@
#include "utils/singleton.hpp"
#include "irrString.h"
+#include "ITexture.h"
+#include
-#include
+#include
#include
+#include
#include
+#include
class STKTexture;
+class ThreadedTexLoader;
namespace irr
{
- namespace video { class ITexture; class SColor; }
+ namespace video { class SColor; }
}
+struct TexConfig
+{
+ bool m_srgb;
+ bool m_premul_alpha;
+ bool m_mesh_tex;
+ bool m_set_material;
+ bool m_colorization_mask;
+ bool m_normal_map;
+ TexConfig(bool srgb = false, bool premul_alpha = false,
+ bool mesh_tex = true, bool set_material = false,
+ bool color_mask = false, bool normal_map = false)
+ {
+ m_srgb = srgb;
+ m_premul_alpha = premul_alpha;
+ m_mesh_tex = mesh_tex;
+ m_set_material = set_material;
+ m_colorization_mask = color_mask;
+ m_normal_map = normal_map;
+ }
+};
+
class STKTexManager : public Singleton, NoCopy
{
private:
@@ -43,22 +69,43 @@ private:
* This is used to specify details like: "while loading kart '...'" */
std::string m_texture_error_message;
+ std::vector m_all_tex_loaders;
+
+ GLuint m_pbo;
+
+ int m_thread_size;
+
+ class SmallestTexture
+ {
+ public:
+ inline bool operator()(const irr::video::ITexture* a,
+ const irr::video::ITexture* b) const
+ {
+ return a->getTextureSize() > b->getTextureSize();
+ }
+ };
+ std::priority_queue, SmallestTexture>
+ m_threaded_load_textures;
+
+ int m_threaded_load_textures_counter;
+
+ pthread_mutex_t m_threaded_load_textures_mutex;
+
+ pthread_cond_t m_cond_request;
+
// ------------------------------------------------------------------------
STKTexture* findTextureInFileSystem(const std::string& filename,
std::string* full_path);
public:
// ------------------------------------------------------------------------
- STKTexManager() {}
+ STKTexManager();
// ------------------------------------------------------------------------
~STKTexManager();
// ------------------------------------------------------------------------
irr::video::ITexture* getTexture(const std::string& path,
- bool srgb = false,
- bool premul_alpha = false,
- bool set_material = false,
- bool mesh_tex = false,
+ TexConfig* tc = NULL,
bool no_upload = false,
- bool single_channel = false,
bool create_if_unfound = true);
// ------------------------------------------------------------------------
irr::video::ITexture* getUnicolorTexture(const irr::video::SColor &c);
@@ -127,6 +174,35 @@ public:
return getTexture(filename, std::string(error_message),
std::string(detail));
} // getTexture
+ // ------------------------------------------------------------------------
+ void checkThreadedLoadTextures(bool util_queue_empty);
+ // ------------------------------------------------------------------------
+ irr::video::ITexture* getThreadedLoadTexture()
+ { return m_threaded_load_textures.top(); }
+ // ------------------------------------------------------------------------
+ void setThreadedLoadTextureCounter(int val)
+ {
+ m_threaded_load_textures_counter += val;
+ assert(m_threaded_load_textures_counter >= 0);
+ }
+ // ------------------------------------------------------------------------
+ void addThreadedLoadTexture(irr::video::ITexture* t)
+ {
+ pthread_mutex_lock(&m_threaded_load_textures_mutex);
+ m_threaded_load_textures.push(t);
+ setThreadedLoadTextureCounter(t->getThreadedLoadTextureCounter());
+ pthread_cond_signal(&m_cond_request);
+ pthread_mutex_unlock(&m_threaded_load_textures_mutex);
+ }
+ // ------------------------------------------------------------------------
+ void removeThreadedLoadTexture() { m_threaded_load_textures.pop(); }
+ // ------------------------------------------------------------------------
+ bool isThreadedLoadTexturesEmpty()
+ { return m_threaded_load_textures.empty(); }
+ // ------------------------------------------------------------------------
+ void createThreadedTexLoaders();
+ // ------------------------------------------------------------------------
+ void destroyThreadedTexLoaders();
}; // STKTexManager
diff --git a/src/graphics/stk_texture.cpp b/src/graphics/stk_texture.cpp
index 6139ca850..593f3f000 100644
--- a/src/graphics/stk_texture.cpp
+++ b/src/graphics/stk_texture.cpp
@@ -18,10 +18,12 @@
#include "graphics/stk_texture.hpp"
#include "config/user_config.hpp"
#include "graphics/central_settings.hpp"
+#include "graphics/hq_mipmap_generator.hpp"
#include "graphics/irr_driver.hpp"
#include "graphics/material.hpp"
#include "graphics/material_manager.hpp"
#include "graphics/materials.hpp"
+#include "graphics/stk_tex_manager.hpp"
#include "modes/profile_world.hpp"
#include "utils/log.hpp"
#include "utils/string_utils.hpp"
@@ -33,23 +35,27 @@
static const uint8_t CACHE_VERSION = 1;
#endif
// ----------------------------------------------------------------------------
-STKTexture::STKTexture(const std::string& path, bool srgb, bool premul_alpha,
- bool set_material, bool mesh_tex, bool no_upload,
- bool single_channel)
- : video::ITexture(path.c_str()), m_texture_handle(0), m_srgb(srgb),
- m_premul_alpha(premul_alpha), m_mesh_texture(mesh_tex),
- m_single_channel(single_channel), m_material(NULL),
- m_texture_name(0), m_texture_size(0), m_texture_image(NULL)
+STKTexture::STKTexture(const std::string& path, TexConfig* tc, bool no_upload)
+ : video::ITexture(path.c_str()), m_texture_handle(0),
+ m_single_channel(false), m_tex_config(NULL), m_material(NULL),
+ m_texture_name(0), m_texture_size(0), m_texture_image(NULL),
+ m_file(NULL), m_img_loader(NULL)
{
- if (set_material)
+ if (tc != NULL)
{
- m_material = material_manager->getMaterialFor(this);
- m_mesh_texture = true;
+ m_tex_config = (TexConfig*)malloc(sizeof(TexConfig));
+ memcpy(m_tex_config, tc, sizeof(TexConfig));
+ m_single_channel = m_tex_config->m_colorization_mask;
+ if (m_tex_config->m_set_material)
+ m_material = material_manager->getMaterialFor(this);
}
-
#ifndef SERVER_ONLY
- if (!CVS->isGLSL())
- m_srgb = false;
+ if (m_tex_config && !CVS->isGLSL())
+ m_tex_config->m_srgb = false;
+#ifdef USE_GLES2
+ if (m_tex_config && !CVS->isDefferedEnabled())
+ m_tex_config->m_srgb = false;
+#endif
if (!CVS->isARBTextureSwizzleUsable())
m_single_channel = false;
#endif
@@ -58,24 +64,25 @@ STKTexture::STKTexture(const std::string& path, bool srgb, bool premul_alpha,
// ----------------------------------------------------------------------------
STKTexture::STKTexture(uint8_t* data, const std::string& name, size_t size,
- bool single_channel)
- : video::ITexture(name.c_str()), m_texture_handle(0), m_srgb(false),
- m_premul_alpha(false), m_mesh_texture(false),
- m_single_channel(single_channel), m_material(NULL),
- m_texture_name(0), m_texture_size(0), m_texture_image(NULL)
+ bool single_channel, bool delete_ttl)
+ : video::ITexture(name.c_str()), m_texture_handle(0),
+ m_single_channel(single_channel), m_tex_config(NULL),
+ m_material(NULL), m_texture_name(0), m_texture_size(0),
+ m_texture_image(NULL), m_file(NULL), m_img_loader(NULL)
{
m_size.Width = size;
m_size.Height = size;
m_orig_size = m_size;
- reload(false/*no_upload*/, data);
+ if (!delete_ttl)
+ reload(false/*no_upload*/, data);
} // STKTexture
// ----------------------------------------------------------------------------
STKTexture::STKTexture(video::IImage* img, const std::string& name)
- : video::ITexture(name.c_str()), m_texture_handle(0), m_srgb(false),
- m_premul_alpha(false), m_mesh_texture(false),
- m_single_channel(false), m_material(NULL), m_texture_name(0),
- m_texture_size(0), m_texture_image(NULL)
+ : video::ITexture(name.c_str()), m_texture_handle(0),
+ m_single_channel(false), m_tex_config(NULL), m_material(NULL),
+ m_texture_name(0), m_texture_size(0), m_texture_image(NULL),
+ m_file(NULL), m_img_loader(NULL)
{
reload(false/*no_upload*/, NULL/*preload_data*/, img);
} // STKTexture
@@ -92,6 +99,7 @@ STKTexture::~STKTexture()
#endif // !SERVER_ONLY
if (m_texture_image != NULL)
m_texture_image->drop();
+ free(m_tex_config);
} // ~STKTexture
// ----------------------------------------------------------------------------
@@ -108,11 +116,10 @@ void STKTexture::reload(bool no_upload, uint8_t* preload_data,
return;
}
#ifndef SERVER_ONLY
- irr_driver->getDevice()->getLogger()->setLogLevel(ELL_NONE);
std::string compressed_texture;
#if !defined(USE_GLES2)
- if (!no_upload && m_mesh_texture && CVS->isTextureCompressionEnabled())
+ if (!no_upload && isMeshTexture() && CVS->isTextureCompressionEnabled())
{
std::string orig_file = NamedPath.getPtr();
@@ -157,77 +164,75 @@ void STKTexture::reload(bool no_upload, uint8_t* preload_data,
uint8_t* data = preload_data;
if (data == NULL)
{
- orig_img = preload_img ? preload_img :
- irr_driver->getVideoDriver()->createImageFromFile(NamedPath);
- if (orig_img == NULL)
+ if (preload_img)
+ orig_img = preload_img;
+ else
{
- return;
- }
-
- if (orig_img->getDimension().Width == 0 ||
- orig_img->getDimension().Height == 0)
- {
- orig_img->drop();
- return;
+ m_file = irr_driver->getDevice()->getFileSystem()
+ ->createAndOpenFile(NamedPath);
+ if (m_file == NULL)
+ return;
+ irr_driver->getVideoDriver()->createImageFromFile(m_file,
+ &m_img_loader);
+ if (m_img_loader == NULL)
+ return;
+ m_file->seek(0);
+ m_orig_size = m_img_loader->getImageSize(m_file);
+ if ((!m_material || m_material->getAlphaMask().empty()) &&
+ useThreadedLoading() && !no_upload)
+ {
+ if (m_orig_size.Width == 0 || m_orig_size.Height == 0)
+ {
+ m_file->drop();
+ m_file = NULL;
+ m_img_loader = NULL;
+ return;
+ }
+ }
+ else
+ {
+ orig_img = m_img_loader->loadImage(m_file);
+ m_file->drop();
+ m_file = NULL;
+ if (orig_img == NULL || orig_img->getDimension().Width == 0 ||
+ orig_img->getDimension().Height == 0)
+ {
+ if (orig_img)
+ orig_img->drop();
+ return;
+ }
+ m_img_loader = NULL;
+ }
}
orig_img = resizeImage(orig_img, &m_orig_size, &m_size);
applyMask(orig_img);
- data = (uint8_t*)orig_img->lock();
- if (m_single_channel)
+ data = orig_img ? (uint8_t*)orig_img->lock() : NULL;
+ if (m_single_channel && !useThreadedLoading())
{
- uint8_t* sc = new uint8_t[m_size.Width * m_size.Height];
- for (unsigned int i = 0; i < m_size.Width * m_size.Height; i++)
- sc[i] = data[4 * i + 3];
+ data = singleChannelConversion(data);
orig_img->unlock();
orig_img->drop();
orig_img = NULL;
- data = sc;
}
}
const unsigned int w = m_size.Width;
const unsigned int h = m_size.Height;
unsigned int format = m_single_channel ? GL_RED : GL_BGRA;
- unsigned int internal_format = m_single_channel ? GL_R8 : GL_RGBA;
+ unsigned int internal_format = m_single_channel ? GL_R8 : isSrgb() ?
+ GL_SRGB8_ALPHA8 : GL_RGBA8;
#if !defined(USE_GLES2)
- if (m_mesh_texture && CVS->isTextureCompressionEnabled())
+ if (isMeshTexture() && CVS->isTextureCompressionEnabled())
{
internal_format = m_single_channel ? GL_COMPRESSED_RED_RGTC1 :
- m_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT :
+ isSrgb() ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT :
GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
}
- else
- {
- internal_format =
- m_single_channel ? GL_R8 : m_srgb ? GL_SRGB_ALPHA : GL_RGBA;
- }
#endif
-#if defined(USE_GLES2)
- if (!CVS->isEXTTextureFormatBGRA8888Usable() && !m_single_channel)
- {
- format = GL_RGBA;
- for (unsigned int i = 0; i < w * h; i++)
- {
- uint8_t tmp_val = data[i * 4];
- data[i * 4] = data[i * 4 + 2];
- data[i * 4 + 2] = tmp_val;
- }
- }
-#endif
- if (m_premul_alpha && !m_single_channel)
- {
- for (unsigned int i = 0; i < w * h; i++)
- {
- float alpha = data[4 * i + 3];
- if (alpha > 0.0f)
- alpha = pow(alpha / 255.f, 1.f / 2.2f);
- data[i * 4] = (uint8_t)(data[i * 4] * alpha);
- data[i * 4 + 1] = (uint8_t)(data[i * 4 + 1] * alpha);
- data[i * 4 + 2] = (uint8_t)(data[i * 4 + 2] * alpha);
- }
- }
+ if (!useThreadedLoading())
+ formatConversion(data, &format, w, h);
if (!no_upload)
{
@@ -246,17 +251,35 @@ void STKTexture::reload(bool no_upload, uint8_t* preload_data,
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_ONE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_RED);
}
- glTexImage2D(GL_TEXTURE_2D, 0, internal_format, w, h, 0, format,
- GL_UNSIGNED_BYTE, data);
+ if (useThreadedLoading())
+ {
+ int levels = 1;
+ int width = w;
+ int height = h;
+ while (true)
+ {
+ width = width < 2 ? 1 : width >> 1;
+ height = height < 2 ? 1 : height >> 1;
+ levels++;
+ if (width == 1 && height == 1)
+ break;
+ }
+ glTexStorage2D(GL_TEXTURE_2D, levels, internal_format, w, h);
+ }
+ else
+ {
+ glTexImage2D(GL_TEXTURE_2D, 0, internal_format, w, h, 0, format,
+ GL_UNSIGNED_BYTE, data);
+ }
}
- else
+ else if (!useThreadedLoading())
{
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, format,
GL_UNSIGNED_BYTE, data);
}
if (orig_img)
orig_img->unlock();
- if (hasMipMaps())
+ if (!useThreadedLoading() && hasMipMaps())
glGenerateMipmap(GL_TEXTURE_2D);
}
@@ -273,54 +296,74 @@ void STKTexture::reload(bool no_upload, uint8_t* preload_data,
if (!no_upload)
glBindTexture(GL_TEXTURE_2D, 0);
- irr_driver->getDevice()->getLogger()->setLogLevel(ELL_WARNING);
#endif // !SERVER_ONLY
} // reload
+//-----------------------------------------------------------------------------
+void STKTexture::formatConversion(uint8_t* data, unsigned int* format,
+ unsigned int w, unsigned int h) const
+{
+#if defined(USE_GLES2)
+ if (!m_single_channel)
+ {
+ if (format)
+ *format = GL_RGBA;
+ for (unsigned int i = 0; i < w * h; i++)
+ {
+ uint8_t tmp_val = data[i * 4];
+ data[i * 4] = data[i * 4 + 2];
+ data[i * 4 + 2] = tmp_val;
+ }
+ }
+#endif
+ if (isPremulAlpha() && !m_single_channel)
+ {
+ for (unsigned int i = 0; i < w * h; i++)
+ {
+ float alpha = data[4 * i + 3];
+ if (alpha > 0.0f)
+ {
+ alpha /= 255.0f;
+#if defined(USE_GLES2)
+ if (CVS->isDefferedEnabled())
+#endif
+ alpha = pow(alpha, 1.0f / 2.2f);
+ }
+ data[i * 4] = (uint8_t)(data[i * 4] * alpha);
+ data[i * 4 + 1] = (uint8_t)(data[i * 4 + 1] * alpha);
+ data[i * 4 + 2] = (uint8_t)(data[i * 4 + 2] * alpha);
+ }
+ }
+} // formatConversion
+
// ----------------------------------------------------------------------------
video::IImage* STKTexture::resizeImage(video::IImage* orig_img,
- core::dimension2du* new_img_size,
- core::dimension2du* new_tex_size)
+ core::dimension2du* orig_size,
+ core::dimension2du* final_size) const
{
video::IImage* image = orig_img;
#ifndef SERVER_ONLY
- const core::dimension2du& old_size = image->getDimension();
- core::dimension2du img_size = old_size;
-
- const float ratio = float(img_size.Width) / float(img_size.Height);
- const unsigned int drv_max_size =
- irr_driver->getVideoDriver()->getMaxTextureSize().Width;
-
- if ((img_size.Width > drv_max_size) && (ratio >= 1.0f))
- {
- img_size.Width = drv_max_size;
- img_size.Height = (unsigned)(drv_max_size / ratio);
- }
- else if (img_size.Height > drv_max_size)
- {
- img_size.Height = drv_max_size;
- img_size.Width = (unsigned)(drv_max_size * ratio);
- }
-
- if (img_size != old_size)
- {
- video::IImage* new_img = irr_driver->getVideoDriver()
- ->createImage(video::ECF_A8R8G8B8, img_size);
- image->copyToScaling(new_img);
- image->drop();
- image = new_img;
- }
-
+ if (image == NULL)
+ assert(orig_size && orig_size->Width > 0 && orig_size->Height > 0);
+ core::dimension2du img_size = image ? image->getDimension() : *orig_size;
core::dimension2du tex_size = img_size.getOptimalSize
(!irr_driver->getVideoDriver()->queryFeature(video::EVDF_TEXTURE_NPOT));
const core::dimension2du& max_size = irr_driver->getVideoDriver()
->getDriverAttributes().getAttributeAsDimension2d("MAX_TEXTURE_SIZE");
- if (max_size.Width > 0 && tex_size.Width > max_size.Width)
+ if (tex_size.Width > max_size.Width)
tex_size.Width = max_size.Width;
- if (max_size.Height > 0 && tex_size.Height > max_size.Height)
+ if (tex_size.Height > max_size.Height)
tex_size.Height = max_size.Height;
+ if (orig_size && final_size)
+ {
+ *orig_size = img_size;
+ *final_size = tex_size;
+ }
+ if (image == NULL)
+ return NULL;
+
if (image->getColorFormat() != video::ECF_A8R8G8B8 ||
tex_size != img_size)
{
@@ -334,11 +377,6 @@ video::IImage* STKTexture::resizeImage(video::IImage* orig_img,
image = new_texture;
}
- if (new_img_size && new_tex_size)
- {
- *new_img_size = img_size;
- *new_tex_size = tex_size;
- }
#endif // !SERVER_ONLY
return image;
} // resizeImage
@@ -548,3 +586,101 @@ void STKTexture::unloadHandle()
}
#endif
} // unloadHandle
+
+//-----------------------------------------------------------------------------
+bool STKTexture::useThreadedLoading() const
+{
+#ifdef SERVER_ONLY
+ return false;
+#else
+ return CVS->supportsThreadedTextureLoading() &&
+ !CVS->isTextureCompressionEnabled() && isMeshTexture() &&
+ m_img_loader && m_img_loader->supportThreadedLoading();
+#endif
+} // useThreadedLoading
+
+//-----------------------------------------------------------------------------
+void STKTexture::threadedReload(void* ptr, void* param) const
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ video::IImage* orig_img =
+ m_img_loader->loadImage(m_file, true/*skip_checking*/);
+ orig_img = resizeImage(orig_img);
+ uint8_t* data = (uint8_t*)orig_img->lock();
+ if (m_single_channel)
+ {
+ data = singleChannelConversion(data);
+ orig_img->unlock();
+ orig_img->drop();
+ orig_img = NULL;
+ }
+ formatConversion(data, NULL, m_size.Width, m_size.Height);
+ memcpy(ptr, data, m_texture_size);
+
+ if (orig_img)
+ {
+ orig_img->unlock();
+ orig_img->setDeleteMemory(false);
+ orig_img->drop();
+ }
+ if (useHQMipmap())
+ {
+ HQMipmapGenerator* hqmg = new HQMipmapGenerator(NamedPath, data,
+ m_size, m_texture_name, m_tex_config);
+ ((STKTexManager*)(param))->addThreadedLoadTexture(hqmg);
+ }
+ else
+ delete[] data;
+#endif
+} // threadedReload
+
+//-----------------------------------------------------------------------------
+void STKTexture::threadedSubImage(void* ptr) const
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ glBindTexture(GL_TEXTURE_2D, m_texture_name);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_size.Width, m_size.Height,
+ m_single_channel ? GL_RED : GL_BGRA, GL_UNSIGNED_BYTE, ptr);
+ if (useHQMipmap())
+ return;
+ if (hasMipMaps())
+ glGenerateMipmap(GL_TEXTURE_2D);
+
+#endif
+} // threadedSubImage
+
+//-----------------------------------------------------------------------------
+void STKTexture::cleanThreadedLoader()
+{
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+ assert(m_file);
+ m_file->drop();
+ m_file = NULL;
+ m_img_loader = NULL;
+#endif
+} // cleanThreadedLoader
+
+//-----------------------------------------------------------------------------
+bool STKTexture::useHQMipmap() const
+{
+ return !m_single_channel && UserConfigParams::m_hq_mipmap &&
+ m_size.Width > 1 && m_size.Height > 1;
+} // useHQMipmap
+
+//-----------------------------------------------------------------------------
+bool STKTexture::isSrgb() const
+{
+ return m_tex_config && m_tex_config->m_srgb;
+} // isSrgb
+
+//-----------------------------------------------------------------------------
+bool STKTexture::isPremulAlpha() const
+{
+ return m_tex_config && m_tex_config->m_premul_alpha;
+} // isPremulAlpha
+
+//-----------------------------------------------------------------------------
+bool STKTexture::isMeshTexture() const
+{
+ return m_tex_config && m_tex_config->m_mesh_tex;
+} // isMeshTexture
diff --git a/src/graphics/stk_texture.hpp b/src/graphics/stk_texture.hpp
index 34be33cbc..3eb09c0b2 100644
--- a/src/graphics/stk_texture.hpp
+++ b/src/graphics/stk_texture.hpp
@@ -24,8 +24,15 @@
#include
#include
+namespace irr
+{
+ namespace io { class IReadFile; }
+ namespace video { class IImageLoader; }
+}
+
using namespace irr;
+struct TexConfig;
class Material;
class STKTexture : public video::ITexture, NoCopy
@@ -35,7 +42,9 @@ private:
uint64_t m_texture_handle;
- bool m_srgb, m_premul_alpha, m_mesh_texture, m_single_channel;
+ bool m_single_channel;
+
+ TexConfig* m_tex_config;
Material* m_material;
@@ -45,26 +54,44 @@ private:
video::IImage* m_texture_image;
+ io::IReadFile* m_file;
+
+ video::IImageLoader* m_img_loader;
+
// ------------------------------------------------------------------------
video::IImage* resizeImage(video::IImage* orig_img,
- core::dimension2du* new_img_size = NULL,
- core::dimension2du* new_tex_size = NULL);
+ core::dimension2du* orig_size = NULL,
+ core::dimension2du* final_size = NULL) const;
// ------------------------------------------------------------------------
void applyMask(video::IImage* orig_img);
// ------------------------------------------------------------------------
bool loadCompressedTexture(const std::string& file_name);
// ------------------------------------------------------------------------
void saveCompressedTexture(const std::string& file_name);
+ // ------------------------------------------------------------------------
+ void formatConversion(uint8_t* data, unsigned int* format, unsigned int w,
+ unsigned int h) const;
+ // ------------------------------------------------------------------------
+ uint8_t* singleChannelConversion(uint8_t* data) const
+ {
+ uint8_t* sc = new uint8_t[m_size.Width * m_size.Height];
+ for (unsigned int i = 0; i < m_size.Width * m_size.Height; i++)
+ sc[i] = data[4 * i + 3];
+ return sc;
+ }
+ // ------------------------------------------------------------------------
+ bool useHQMipmap() const;
+ // ------------------------------------------------------------------------
+ bool isSrgb() const;
+ // ------------------------------------------------------------------------
+ bool isPremulAlpha() const;
public:
// ------------------------------------------------------------------------
- STKTexture(const std::string& path, bool srgb = false,
- bool premul_alpha = false, bool set_material = false,
- bool mesh_tex = false, bool no_upload = false,
- bool single_channel = false);
+ STKTexture(const std::string& path, TexConfig* tc, bool no_upload = false);
// ------------------------------------------------------------------------
STKTexture(uint8_t* data, const std::string& name, size_t size,
- bool single_channel = false);
+ bool single_channel = false, bool delete_ttl = false);
// ------------------------------------------------------------------------
STKTexture(video::IImage* img, const std::string& name);
// ------------------------------------------------------------------------
@@ -108,20 +135,27 @@ public:
// ------------------------------------------------------------------------
virtual void unloadHandle();
// ------------------------------------------------------------------------
- bool isSrgb() const { return m_srgb; }
- // ------------------------------------------------------------------------
- bool isPremulAlpha() const { return m_premul_alpha; }
- // ------------------------------------------------------------------------
- bool isMeshTexture() const { return m_mesh_texture; }
- // ------------------------------------------------------------------------
- void setMeshTexture(bool val) { m_mesh_texture = val; }
- // ------------------------------------------------------------------------
- unsigned int getTextureSize() const { return m_texture_size; }
+ virtual unsigned int getTextureSize() const { return m_texture_size; }
// ------------------------------------------------------------------------
void reload(bool no_upload = false, uint8_t* preload_data = NULL,
video::IImage* preload_img = NULL);
// ------------------------------------------------------------------------
video::IImage* getTextureImage() { return m_texture_image; }
+ // ------------------------------------------------------------------------
+ bool useThreadedLoading() const;
+ // ------------------------------------------------------------------------
+ virtual void threadedReload(void* ptr, void* param) const;
+ // ------------------------------------------------------------------------
+ virtual void threadedSubImage(void* ptr) const;
+ // ------------------------------------------------------------------------
+ virtual void cleanThreadedLoader();
+ // ------------------------------------------------------------------------
+ virtual int getThreadedLoadTextureCounter() const
+ {
+ return useHQMipmap() ? 2 : 1;
+ }
+ // ------------------------------------------------------------------------
+ bool isMeshTexture() const;
}; // STKTexture
diff --git a/src/graphics/threaded_tex_loader.cpp b/src/graphics/threaded_tex_loader.cpp
new file mode 100644
index 000000000..564fd75e8
--- /dev/null
+++ b/src/graphics/threaded_tex_loader.cpp
@@ -0,0 +1,100 @@
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2017 SuperTuxKart-Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "graphics/threaded_tex_loader.hpp"
+#include "graphics/stk_tex_manager.hpp"
+#include "utils/string_utils.hpp"
+#include "utils/vs.hpp"
+
+#include
+#include
+
+// ----------------------------------------------------------------------------
+void* ThreadedTexLoader::startRoutine(void *obj)
+{
+ ThreadedTexLoader* ttl = (ThreadedTexLoader*)obj;
+ VS::setThreadName((std::string("ThrTexLoader") +
+ StringUtils::toString(ttl->m_pbo_offset / 1024 / 1024)).c_str());
+ while (true)
+ {
+ pthread_mutex_lock(&ttl->m_mutex);
+ bool finished = ttl->finishedLoading();
+ pthread_mutex_unlock(&ttl->m_mutex);
+ if (finished)
+ {
+ continue;
+ }
+ pthread_mutex_lock(ttl->m_texture_queue_mutex);
+ bool waiting = ttl->m_stktm->isThreadedLoadTexturesEmpty();
+ ttl->m_last_queue_ready.setAtomic(!ttl->m_completed_textures.empty() &&
+ waiting);
+ while (waiting)
+ {
+ pthread_cond_wait(ttl->m_cond_request, ttl->m_texture_queue_mutex);
+ waiting = ttl->m_stktm->isThreadedLoadTexturesEmpty();
+ }
+ irr::video::ITexture* target_tex =
+ ttl->m_stktm->getThreadedLoadTexture();
+ if (strcmp(target_tex->getName().getPtr(), "delete_ttl") == 0)
+ {
+ ttl->m_stktm->removeThreadedLoadTexture();
+ ttl->m_stktm->setThreadedLoadTextureCounter(-1);
+ pthread_mutex_unlock(ttl->m_texture_queue_mutex);
+ ttl->setCanBeDeleted();
+ return NULL;
+ }
+ assert(target_tex->getTextureSize() <= ttl->m_tex_capacity);
+ if (target_tex->getTextureSize() + ttl->m_tex_size_loaded >
+ ttl->m_tex_capacity)
+ {
+ pthread_mutex_lock(&ttl->m_mutex);
+ ttl->setFinishLoading();
+ pthread_mutex_unlock(&ttl->m_mutex);
+ pthread_mutex_unlock(ttl->m_texture_queue_mutex);
+ continue;
+ }
+ ttl->m_stktm->removeThreadedLoadTexture();
+ pthread_mutex_unlock(ttl->m_texture_queue_mutex);
+ target_tex->threadedReload(ttl->m_pbo_ptr + ttl->m_tex_size_loaded,
+ ttl->m_stktm);
+ target_tex->cleanThreadedLoader();
+ ttl->m_tex_size_loaded += target_tex->getTextureSize();
+ ttl->m_completed_textures.push_back(target_tex);
+ pthread_mutex_lock(ttl->m_texture_queue_mutex);
+ ttl->m_stktm->setThreadedLoadTextureCounter(-1);
+ pthread_mutex_unlock(ttl->m_texture_queue_mutex);
+ }
+ return NULL;
+} // startRoutine
+
+// ----------------------------------------------------------------------------
+void ThreadedTexLoader::handleCompletedTextures()
+{
+ assert(m_locked);
+ size_t offset = m_pbo_offset;
+ for (irr::video::ITexture* tex : m_completed_textures)
+ {
+ size_t cur_offset = tex->getTextureSize();
+ tex->threadedSubImage((void*)offset);
+ offset += cur_offset;
+ }
+ m_completed_textures.clear();
+} // handleCompletedTextures
+
+#endif
diff --git a/src/graphics/threaded_tex_loader.hpp b/src/graphics/threaded_tex_loader.hpp
new file mode 100644
index 000000000..22bd15d08
--- /dev/null
+++ b/src/graphics/threaded_tex_loader.hpp
@@ -0,0 +1,118 @@
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2017 SuperTuxKart-Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#ifndef HEADER_THREADED_TEX_LOADER_HPP
+#define HEADER_THREADED_TEX_LOADER_HPP
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "utils/can_be_deleted.hpp"
+#include "utils/no_copy.hpp"
+#include "utils/synchronised.hpp"
+#include "utils/types.hpp"
+
+#include
+
+namespace irr
+{
+ namespace video { class ITexture; }
+}
+class STKTexManager;
+
+class ThreadedTexLoader : public NoCopy, public CanBeDeleted
+{
+private:
+ const unsigned m_tex_capacity;
+
+ const size_t m_pbo_offset;
+
+ uint8_t* m_pbo_ptr;
+
+ pthread_mutex_t m_mutex;
+
+ pthread_mutex_t* m_texture_queue_mutex;
+
+ pthread_cond_t* m_cond_request;
+
+ STKTexManager* m_stktm;
+
+ unsigned m_tex_size_loaded;
+
+ pthread_t m_thread;
+
+ bool m_finished_loading, m_locked;
+
+ Synchronised m_last_queue_ready;
+
+ std::vector m_completed_textures;
+
+public:
+ // ------------------------------------------------------------------------
+ static void* startRoutine(void *obj);
+ // ------------------------------------------------------------------------
+ ThreadedTexLoader(unsigned capacity, size_t offset, uint8_t* pbo_ptr,
+ pthread_mutex_t* mutex, pthread_cond_t* cond,
+ STKTexManager* stktm)
+ : m_tex_capacity(capacity), m_pbo_offset(offset),
+ m_pbo_ptr(pbo_ptr), m_texture_queue_mutex(mutex),
+ m_cond_request(cond), m_stktm(stktm),
+ m_tex_size_loaded(0), m_finished_loading(false),
+ m_locked(false), m_last_queue_ready(false)
+ {
+ pthread_mutex_init(&m_mutex, NULL);
+ pthread_create(&m_thread, NULL, &startRoutine, this);
+ }
+ // ------------------------------------------------------------------------
+ ~ThreadedTexLoader()
+ {
+ pthread_mutex_destroy(&m_mutex);
+ pthread_join(m_thread, NULL);
+ }
+ // ------------------------------------------------------------------------
+ bool finishedLoading() const { return m_finished_loading; }
+ // ------------------------------------------------------------------------
+ void setFinishLoading()
+ {
+ m_last_queue_ready.setAtomic(false);
+ m_finished_loading = true;
+ m_tex_size_loaded = 0;
+ }
+ // ------------------------------------------------------------------------
+ bool lastQueueReady() const { return m_last_queue_ready.getAtomic(); }
+ // ------------------------------------------------------------------------
+ void handleCompletedTextures();
+ // ------------------------------------------------------------------------
+ void lock()
+ {
+ pthread_mutex_lock(&m_mutex);
+ m_locked = true;
+ }
+ // ------------------------------------------------------------------------
+ void unlock(bool finish_it)
+ {
+ if (!m_locked) return;
+ m_locked = false;
+ if (finish_it)
+ m_finished_loading = false;
+ pthread_mutex_unlock(&m_mutex);
+ }
+
+}; // ThreadedTexLoader
+
+#endif
+
+#endif
diff --git a/src/guiengine/message_queue.cpp b/src/guiengine/message_queue.cpp
index 548c5a86c..2063d1e2a 100644
--- a/src/guiengine/message_queue.cpp
+++ b/src/guiengine/message_queue.cpp
@@ -20,18 +20,25 @@
#include "guiengine/message_queue.hpp"
-#include "config/user_config.hpp"
+#include "graphics/irr_driver.hpp"
#include "guiengine/engine.hpp"
-#include "guiengine/scalable_font.hpp"
#include "guiengine/skin.hpp"
+#include "utils/synchronised.hpp"
+#include "utils/translation.hpp"
#include "IGUIElement.h"
+#include "IGUIEnvironment.h"
+#include "IGUIStaticText.h"
using namespace GUIEngine;
namespace MessageQueue
{
+// ============================================================================
+/** The area which the message is drawn. */
+core::recti g_area;
+// ============================================================================
/** A small helper class to store and sort messages to be displayed. */
class Message
{
@@ -45,11 +52,15 @@ private:
* or friend-message::neutral. */
std::string m_render_type;
+ /** The text label, can do linebreak if needed. */
+ gui::IGUIStaticText* m_text;
+
public:
Message(MessageQueue::MessageType mt, const core::stringw &message)
{
m_message_type = mt;
m_message = message;
+ m_text = NULL;
if(mt==MessageQueue::MT_ACHIEVEMENT)
m_render_type = "achievement-message::neutral";
else if (mt==MessageQueue::MT_ERROR)
@@ -60,6 +71,11 @@ public:
m_render_type = "friend-message::neutral";
} // Message
// ------------------------------------------------------------------------
+ ~Message()
+ {
+ assert(m_text != NULL);
+ m_text->drop();
+ }
/** Returns the message. */
const core::stringw & getMessage() const { return m_message; }
// ------------------------------------------------------------------------
@@ -72,6 +88,39 @@ public:
{
return m_render_type;
}
+ // ------------------------------------------------------------------------
+ /** Init the message text, do linebreak as required. */
+ void init()
+ {
+ const GUIEngine::BoxRenderParams &brp =
+ GUIEngine::getSkin()->getBoxRenderParams(m_render_type);
+ const unsigned width = irr_driver->getActualScreenSize().Width;
+ const unsigned height = irr_driver->getActualScreenSize().Height;
+ const unsigned max_width = width - (brp.m_left_border +
+ brp.m_right_border);
+ m_text =
+ GUIEngine::getGUIEnv()->addStaticText(m_message.c_str(),
+ core::recti(0, 0, max_width, height));
+ m_text->setRightToLeft(translations->isRTLText(m_message));
+ core::dimension2du dim(m_text->getTextWidth(),
+ m_text->getTextHeight());
+ dim.Width += brp.m_left_border + brp.m_right_border;
+ int x = (width - dim.Width) / 2;
+ int y = height - int(1.5f * dim.Height);
+ g_area = irr::core::recti(x, y, x + dim.Width, y + dim.Height);
+ m_text->setRelativePosition(g_area);
+ m_text->setTextAlignment(gui::EGUIA_CENTER, gui::EGUIA_CENTER);
+ m_text->grab();
+ m_text->remove();
+ }
+ // ------------------------------------------------------------------------
+ /** Draw the message. */
+ void draw()
+ {
+ assert(m_text != NULL);
+ m_text->draw();
+ }
+
}; // class Message
// ============================================================================
@@ -88,11 +137,10 @@ public:
} // operator ()
}; // operator()
-
// ============================================================================
/** List of all messages. */
-std::priority_queue,
- CompareMessages> g_all_messages;
+Synchronised,
+ CompareMessages> > g_all_messages;
/** How long the current message has been displayed. The special value
* -1 indicates that a new message was added when the queue was empty. */
@@ -103,26 +151,18 @@ float g_max_display_time = 5.0f;
/** The label widget used to show the current message. */
SkinWidgetContainer *g_container = NULL;
-core::recti g_area;
// ============================================================================
-void createLabel(const Message *message)
+void createLabel(Message *message)
{
if(!g_container)
g_container = new SkinWidgetContainer();
- gui::ScalableFont *font = GUIEngine::getFont();
- core::dimension2du dim = font->getDimension(message->getMessage().c_str());
g_current_display_time = 0.0f;
// Maybe make this time dependent on message length as well?
g_max_display_time = 5.0f;
- const GUIEngine::BoxRenderParams &brp =
- GUIEngine::getSkin()->getBoxRenderParams(message->getRenderType());
- dim.Width +=brp.m_left_border + brp.m_right_border;
- int x = (UserConfigParams::m_width - dim.Width) / 2;
- int y = UserConfigParams::m_height - int(1.5f*dim.Height);
- g_area = irr::core::recti(x, y, x+dim.Width, y+dim.Height);
+ message->init();
} // createLabel
// ----------------------------------------------------------------------------
@@ -130,9 +170,16 @@ void createLabel(const Message *message)
* position of the message. */
void updatePosition()
{
- if (g_all_messages.empty()) return;
- Message *last = g_all_messages.top();
+ g_all_messages.lock();
+ bool empty = g_all_messages.getData().empty();
+ if (empty)
+ {
+ g_all_messages.unlock();
+ return;
+ }
+ Message *last = g_all_messages.getData().top();
createLabel(last);
+ g_all_messages.unlock();
} // updatePosition
// ----------------------------------------------------------------------------
@@ -143,13 +190,15 @@ void updatePosition()
void add(MessageType mt, const irr::core::stringw &message)
{
Message *m = new Message(mt, message);
- if(g_all_messages.empty())
+ g_all_messages.lock();
+ if (g_all_messages.getData().empty())
{
// Indicate that there is a new message, which should
// which needs a new label etc. to be computed.
g_current_display_time =-1.0f;
}
- g_all_messages.push(m);
+ g_all_messages.getData().push(m);
+ g_all_messages.unlock();
} // add
// ----------------------------------------------------------------------------
@@ -161,32 +210,38 @@ void add(MessageType mt, const irr::core::stringw &message)
*/
void update(float dt)
{
- if(g_all_messages.empty()) return;
+ g_all_messages.lock();
+ bool empty = g_all_messages.getData().empty();
+ g_all_messages.unlock();
+ if (empty) return;
+ g_all_messages.lock();
g_current_display_time += dt;
- if(g_current_display_time > g_max_display_time)
+ if (g_current_display_time > g_max_display_time)
{
- Message *last = g_all_messages.top();
- g_all_messages.pop();
+ Message *last = g_all_messages.getData().top();
+ g_all_messages.getData().pop();
delete last;
- if(g_all_messages.empty()) return;
+ if (g_all_messages.getData().empty())
+ {
+ g_all_messages.unlock();
+ return;
+ }
g_current_display_time = -1.0f;
}
+ Message *current = g_all_messages.getData().top();
// Create new data for the display.
- if(g_current_display_time < 0)
+ if (g_current_display_time < 0)
{
- createLabel(g_all_messages.top());
+ createLabel(current);
}
+ g_all_messages.unlock();
- Message *current = g_all_messages.top();
GUIEngine::getSkin()->drawMessage(g_container, g_area,
current->getRenderType());
- gui::ScalableFont *font = GUIEngine::getFont();
-
- video::SColor color(255, 0, 0, 0);
- font->draw(current->getMessage(), g_area, color, true, true);
-
+ current->draw();
+
} // update
} // namespace GUIEngine
diff --git a/src/guiengine/widgets/icon_button_widget.cpp b/src/guiengine/widgets/icon_button_widget.cpp
index 730710f4d..379af20fe 100644
--- a/src/guiengine/widgets/icon_button_widget.cpp
+++ b/src/guiengine/widgets/icon_button_widget.cpp
@@ -342,9 +342,7 @@ video::ITexture* IconButtonWidget::getDeactivatedTexture(video::ITexture* textur
name += "_disabled";
STKTexManager* stkm = STKTexManager::getInstance();
STKTexture* disabled_stk_tex = static_cast(stkm->getTexture
- (name, false/*srgb*/, false/*premul_alpha*/, false/*set_material*/,
- false/*mesh_tex*/, false /*no_upload*/, false/*single_channel*/,
- false/*create_if_unfound*/));
+ (name, NULL/*tc*/, false /*no_upload*/, false/*create_if_unfound*/));
if (disabled_stk_tex == NULL)
{
SColor c;
diff --git a/src/input/input_manager.cpp b/src/input/input_manager.cpp
index df3308a32..9594ca1ef 100644
--- a/src/input/input_manager.cpp
+++ b/src/input/input_manager.cpp
@@ -273,8 +273,18 @@ void InputManager::handleStaticAction(int key, int value)
case KEY_PRINT:
// on windows we don't get a press event, only release. So
// save on release only (to avoid saving twice on other platforms)
- if (value ==0 )
- irr_driver->requestScreenshot();
+ if (value == 0)
+ {
+ if (control_is_pressed)
+ {
+ const bool is_recording = irr_driver->isRecording();
+ irr_driver->setRecording(!is_recording);
+ }
+ else
+ {
+ irr_driver->requestScreenshot();
+ }
+ }
break;
case KEY_F11:
if(value && shift_is_pressed && world && RewindManager::isEnabled())
diff --git a/src/main_loop.cpp b/src/main_loop.cpp
index 80b1aec28..6f97697f2 100644
--- a/src/main_loop.cpp
+++ b/src/main_loop.cpp
@@ -117,7 +117,9 @@ float MainLoop::getLimitedDt()
// Throttle fps if more than maximum, which can reduce
// the noise the fan on a graphics card makes.
// When in menus, reduce FPS much, it's not necessary to push to the maximum for plain menus
- const int max_fps = (StateManager::get()->throttleFPS() ? 30 : UserConfigParams::m_max_fps);
+ const int max_fps = (irr_driver->isRecording() &&
+ UserConfigParams::m_limit_game_fps ? UserConfigParams::m_record_fps :
+ StateManager::get()->throttleFPS() ? 60 : UserConfigParams::m_max_fps);
if (dt > 0)
{
const int current_fps = (int)(1000.0f / dt);
diff --git a/src/modes/soccer_world.cpp b/src/modes/soccer_world.cpp
index 60bcaa37c..1f660ada1 100644
--- a/src/modes/soccer_world.cpp
+++ b/src/modes/soccer_world.cpp
@@ -331,13 +331,12 @@ void SoccerWorld::initKartList()
std::string blue_path =
file_manager->getAsset(FileManager::GUI, "soccer_player_blue.png");
+ TexConfig btc(true/*srgb*/, true/*premul_alpha*/);
video::ITexture* red = STKTexManager::getInstance()->getTexture
- (red_path, true/*srgb*/, true/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ (red_path, &btc);
video::ITexture* blue = STKTexManager::getInstance()->getTexture
- (blue_path, true/*srgb*/, true/*premul_alpha*/, false/*set_material*/,
- true/*mesh_tex*/);
+ (blue_path, &btc);
//Assigning indicators
for(unsigned int i = 0; i < kart_amount; i++)
diff --git a/src/modes/three_strikes_battle.cpp b/src/modes/three_strikes_battle.cpp
index 87052c086..86f789243 100644
--- a/src/modes/three_strikes_battle.cpp
+++ b/src/modes/three_strikes_battle.cpp
@@ -179,9 +179,9 @@ void ThreeStrikesBattle::kartAdded(AbstractKart* kart, scene::ISceneNode* node)
// Add heart billboard above it
std::string heart_path =
file_manager->getAsset(FileManager::GUI, "heart.png");
+ TexConfig btc(true/*srgb*/, true/*premul_alpha*/);
video::ITexture* heart = STKTexManager::getInstance()->getTexture
- (heart_path, true/*srgb*/, true/*premul_alpha*/,
- false/*set_material*/, true/*mesh_tex*/);
+ (heart_path, &btc);
float height = kart->getKartHeight() + 0.5f;
diff --git a/src/states_screens/dialogs/custom_video_settings.cpp b/src/states_screens/dialogs/custom_video_settings.cpp
index b0435834d..b7b5a6dd5 100644
--- a/src/states_screens/dialogs/custom_video_settings.cpp
+++ b/src/states_screens/dialogs/custom_video_settings.cpp
@@ -61,8 +61,6 @@ void CustomVideoSettingsDialog::beforeAddingWidgets()
getWidget("anim_gfx")->setState(UserConfigParams::m_graphical_effects);
getWidget("weather_gfx")->setState(UserConfigParams::m_weather_effects);
getWidget("dof")->setState(UserConfigParams::m_dof);
- getWidget("hd-textures")
- ->setState((UserConfigParams::m_high_definition_textures & 0x01)==0x01);
SpinnerWidget* kart_anim = getWidget("steering_animations");
kart_anim->addLabel(_("Disabled")); // 0
@@ -79,33 +77,24 @@ void CustomVideoSettingsDialog::beforeAddingWidgets()
//I18N: Geometry level disabled : lowest level, no details
geometry_level->addLabel(_("Disabled"));
//I18N: Geometry level low : few details are displayed
- geometry_level->addLabel(_("low"));
+ geometry_level->addLabel(_("Low"));
//I18N: Geometry level high : everything is displayed
- geometry_level->addLabel(_("high"));
+ geometry_level->addLabel(_("High"));
geometry_level->setValue(
UserConfigParams::m_geometry_level == 2 ? 0 :
UserConfigParams::m_geometry_level == 0 ? 2 : 1);
- SpinnerWidget* filtering = getWidget("filtering");
- int value = 0;
- if (UserConfigParams::m_anisotropic == 2) value = 2;
- else if (UserConfigParams::m_anisotropic == 4) value = 3;
- else if (UserConfigParams::m_anisotropic == 8) value = 4;
- else if (UserConfigParams::m_anisotropic == 16) value = 5;
- else if (UserConfigParams::m_trilinear) value = 1;
- filtering->addLabel(_("Bilinear")); // 0
- filtering->addLabel(_("Trilinear")); // 1
- filtering->addLabel(_("Anisotropic x2")); // 2
- filtering->addLabel(_("Anisotropic x4")); // 3
- filtering->addLabel(_("Anisotropic x8")); // 4
- filtering->addLabel(_("Anisotropic x16")); // 5
-
- filtering->setValue(value);
+ SpinnerWidget* filtering = getWidget("image_quality");
+ filtering->addLabel(_("Very Low"));
+ filtering->addLabel(_("Low"));
+ filtering->addLabel(_("High"));
+ filtering->addLabel(_("Very High"));
+ filtering->setValue(OptionsScreenVideo::getImageQuality());
SpinnerWidget* shadows = getWidget("shadows");
shadows->addLabel(_("Disabled")); // 0
- shadows->addLabel(_("low")); // 1
- shadows->addLabel(_("high")); // 2
+ shadows->addLabel(_("Low")); // 1
+ shadows->addLabel(_("High")); // 2
if (CVS->supportsShadows())
shadows->setValue(UserConfigParams::m_shadows_resolution / 512);
else
@@ -195,11 +184,6 @@ GUIEngine::EventPropagation CustomVideoSettingsDialog::processEvent(const std::s
UserConfigParams::m_weather_effects =
getWidget("weather_gfx")->getState();
- // Set bit 0 for enabled/disabled, and set bit 1 to indicate that this
- // is now a user's choice and should not be overwritten by any default
- UserConfigParams::m_high_definition_textures =
- getWidget("hd-textures")->getState() ? 0x03 : 0x02;
-
UserConfigParams::m_show_steering_animations =
getWidget("steering_animations")->getValue();
@@ -207,33 +191,8 @@ GUIEngine::EventPropagation CustomVideoSettingsDialog::processEvent(const std::s
getWidget("geometry_detail")->getValue();
UserConfigParams::m_geometry_level = val == 2 ? 0 : val == 0 ? 2 : 1;
- switch (getWidget("filtering")->getValue())
- {
- case 0:
- UserConfigParams::m_anisotropic = 0;
- UserConfigParams::m_trilinear = false;
- break;
- case 1:
- UserConfigParams::m_anisotropic = 0;
- UserConfigParams::m_trilinear = true;
- break;
- case 2:
- UserConfigParams::m_anisotropic = 2;
- UserConfigParams::m_trilinear = true;
- break;
- case 3:
- UserConfigParams::m_anisotropic = 4;
- UserConfigParams::m_trilinear = true;
- break;
- case 4:
- UserConfigParams::m_anisotropic = 8;
- UserConfigParams::m_trilinear = true;
- break;
- case 5:
- UserConfigParams::m_anisotropic = 16;
- UserConfigParams::m_trilinear = true;
- break;
- }
+ OptionsScreenVideo::setImageQuality(getWidget
+ ("image_quality")->getValue());
user_config->saveConfig();
diff --git a/src/states_screens/dialogs/debug_slider.cpp b/src/states_screens/dialogs/debug_slider.cpp
index 464977cb3..23dd0da74 100644
--- a/src/states_screens/dialogs/debug_slider.cpp
+++ b/src/states_screens/dialogs/debug_slider.cpp
@@ -39,7 +39,6 @@ DebugSliderDialog::DebugSliderDialog() : ModalDialog(0.85f, 0.25f, MODAL_DIALOG_
loadFromFile("debug_slider.stkgui");
}
-#if !defined(__APPLE__)
void DebugSliderDialog::setSliderHook(std::string id, unsigned min, unsigned max, std::function G, std::function S)
{
getWidget(id.c_str())->setValue(G());
@@ -47,7 +46,6 @@ void DebugSliderDialog::setSliderHook(std::string id, unsigned min, unsigned max
getWidget(id.c_str())->setMax(max);
Setters[id] = S;
}
-#endif
// ------------------------------------------------------------------------------------------------------
@@ -66,7 +64,6 @@ void DebugSliderDialog::onEnterPressedInternal()
GUIEngine::EventPropagation DebugSliderDialog::processEvent(const std::string& eventSource)
{
-#if !defined(__APPLE__)
if (Setters.find(eventSource) == Setters.end())
return GUIEngine::EVENT_LET;
@@ -74,9 +71,6 @@ GUIEngine::EventPropagation DebugSliderDialog::processEvent(const std::string& e
Log::info("DebugSlider", "Value for <%s> : %i", eventSource.c_str(), value);
Setters[eventSource](value);
return GUIEngine::EVENT_BLOCK;
-#else
- return GUIEngine::EVENT_LET;
-#endif
}
// ------------------------------------------------------------------------------------------------------
diff --git a/src/states_screens/dialogs/debug_slider.hpp b/src/states_screens/dialogs/debug_slider.hpp
index 8e0768db6..36b5dba84 100644
--- a/src/states_screens/dialogs/debug_slider.hpp
+++ b/src/states_screens/dialogs/debug_slider.hpp
@@ -33,17 +33,13 @@ class DebugSliderDialog : public GUIEngine::ModalDialog
private:
std::string m_id;
-#if !defined(__APPLE__)
std::map >Setters;
-#endif
public:
DebugSliderDialog();
~DebugSliderDialog() {};
-#if !defined(__APPLE__)
void setSliderHook(std::string id, unsigned min, unsigned max, std::function G, std::function S);
-#endif
void changeLabel(std::string id, std::string new_label);
virtual void onEnterPressedInternal() OVERRIDE;
diff --git a/src/states_screens/options_screen_video.cpp b/src/states_screens/options_screen_video.cpp
index b85e5b158..765b35268 100644
--- a/src/states_screens/options_screen_video.cpp
+++ b/src/states_screens/options_screen_video.cpp
@@ -23,6 +23,7 @@
#include "graphics/central_settings.hpp"
#include "graphics/irr_driver.hpp"
#include "graphics/shared_gpu_objects.hpp"
+#include "graphics/stk_tex_manager.hpp"
#include "guiengine/screen.hpp"
#include "guiengine/widgets/button_widget.hpp"
#include "guiengine/widgets/check_box_widget.hpp"
@@ -53,32 +54,32 @@ void OptionsScreenVideo::initPresets()
({
false /* light */, 0 /* shadow */, false /* bloom */, false /* motionblur */,
false /* lightshaft */, false /* glow */, false /* mlaa */, false /* ssao */, false /* weather */,
- false /* animatedScenery */, 0 /* animatedCharacters */, 0 /* anisotropy */,
- false /* depth of field */, false /* global illumination */, true /* degraded IBL */, 0 /* hd_textures */
+ false /* animatedScenery */, 0 /* animatedCharacters */, 0 /* image_quality */,
+ false /* depth of field */, false /* global illumination */, true /* degraded IBL */
});
m_presets.push_back
({
false /* light */, 0 /* shadow */, false /* bloom */, false /* motionblur */,
false /* lightshaft */, false /* glow */, false /* mlaa */, false /* ssao */, false /* weather */,
- true /* animatedScenery */, 1 /* animatedCharacters */, 4 /* anisotropy */,
- false /* depth of field */, false /* global illumination */, true /* degraded IBL */, 0 /* hd_textures */
+ true /* animatedScenery */, 1 /* animatedCharacters */, 1 /* image_quality */,
+ false /* depth of field */, false /* global illumination */, true /* degraded IBL */
});
m_presets.push_back
({
true /* light */, 0 /* shadow */, false /* bloom */, false /* motionblur */,
false /* lightshaft */, false /* glow */, false /* mlaa */, false /* ssao */, true /* weather */,
- true /* animatedScenery */, 1 /* animatedCharacters */, 4 /* anisotropy */,
- false /* depth of field */, false /* global illumination */, true /* degraded IBL */, 1 /* hd_textures */
+ true /* animatedScenery */, 1 /* animatedCharacters */, 2 /* image_quality */,
+ false /* depth of field */, false /* global illumination */, true /* degraded IBL */
});
m_presets.push_back
({
true /* light */, 0 /* shadow */, false /* bloom */, true /* motionblur */,
true /* lightshaft */, true /* glow */, true /* mlaa */, false /* ssao */, true /* weather */,
- true /* animatedScenery */, 1 /* animatedCharacters */, 8 /* anisotropy */,
- false /* depth of field */, false /* global illumination */, false /* degraded IBL */, 1 /* hd_textures */
+ true /* animatedScenery */, 1 /* animatedCharacters */, 2 /* image_quality */,
+ false /* depth of field */, false /* global illumination */, false /* degraded IBL */
});
m_presets.push_back
@@ -91,8 +92,8 @@ void OptionsScreenVideo::initPresets()
#else
2 /* animatedCharacters */,
#endif
- 16 /* anisotropy */,
- true /* depth of field */, false /* global illumination */, false /* degraded IBL */, 1 /* hd_textures */
+ 3 /* image_quality */,
+ true /* depth of field */, false /* global illumination */, false /* degraded IBL */
});
m_presets.push_back
@@ -105,8 +106,8 @@ void OptionsScreenVideo::initPresets()
#else
2 /* animatedCharacters */,
#endif
- 16 /* anisotropy */,
- true /* depth of field */, true /* global illumination */, false /* degraded IBL */, 1 /* hd_textures */
+ 3 /* image_quality */,
+ true /* depth of field */, true /* global illumination */, false /* degraded IBL */
});
} // initPresets
@@ -139,10 +140,79 @@ struct Resolution
}
};
+// ----------------------------------------------------------------------------
+int OptionsScreenVideo::getImageQuality()
+{
+ if (UserConfigParams::m_scale_rtts_factor == 0.8f &&
+ UserConfigParams::m_trilinear == false &&
+ UserConfigParams::m_anisotropic == 0 &&
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0x00 &&
+ UserConfigParams::m_hq_mipmap == false)
+ return 0;
+ if (UserConfigParams::m_scale_rtts_factor == 1.0f &&
+ UserConfigParams::m_trilinear == true &&
+ UserConfigParams::m_anisotropic == 2 &&
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0x00 &&
+ UserConfigParams::m_hq_mipmap == false)
+ return 1;
+ if (UserConfigParams::m_scale_rtts_factor == 1.0f &&
+ UserConfigParams::m_trilinear == true &&
+ UserConfigParams::m_anisotropic == 4 &&
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0x01 &&
+ UserConfigParams::m_hq_mipmap == false)
+ return 2;
+ if (UserConfigParams::m_scale_rtts_factor == 1.0f &&
+ UserConfigParams::m_trilinear == true &&
+ UserConfigParams::m_anisotropic == 16 &&
+ (UserConfigParams::m_high_definition_textures & 0x01) == 0x01 &&
+ UserConfigParams::m_hq_mipmap == true)
+ return 3;
+ return 2;
+} // getImageQuality
+
+// ----------------------------------------------------------------------------
+void OptionsScreenVideo::setImageQuality(int quality)
+{
+ switch (quality)
+ {
+ case 0:
+ UserConfigParams::m_scale_rtts_factor = 0.8f;
+ UserConfigParams::m_trilinear = false;
+ UserConfigParams::m_anisotropic = 0;
+ UserConfigParams::m_high_definition_textures = 0x02;
+ UserConfigParams::m_hq_mipmap = false;
+ break;
+ case 1:
+ UserConfigParams::m_scale_rtts_factor = 1.0f;
+ UserConfigParams::m_trilinear = true;
+ UserConfigParams::m_anisotropic = 2;
+ UserConfigParams::m_high_definition_textures = 0x02;
+ UserConfigParams::m_hq_mipmap = false;
+ break;
+ case 2:
+ UserConfigParams::m_scale_rtts_factor = 1.0f;
+ UserConfigParams::m_trilinear = true;
+ UserConfigParams::m_anisotropic = 4;
+ UserConfigParams::m_high_definition_textures = 0x03;
+ UserConfigParams::m_hq_mipmap = false;
+ break;
+ case 3:
+ UserConfigParams::m_scale_rtts_factor = 1.0f;
+ UserConfigParams::m_trilinear = true;
+ UserConfigParams::m_anisotropic = 16;
+ UserConfigParams::m_high_definition_textures = 0x03;
+ UserConfigParams::m_hq_mipmap = true;
+ break;
+ default:
+ assert(false);
+ }
+} // setImageQuality
+
// ----------------------------------------------------------------------------
OptionsScreenVideo::OptionsScreenVideo() : Screen("options_video.stkgui"),
- m_prev_adv_pipline(false)
+ m_prev_adv_pipline(false),
+ m_prev_img_quality(-1)
{
m_inited = false;
initPresets();
@@ -168,6 +238,7 @@ void OptionsScreenVideo::init()
{
Screen::init();
m_prev_adv_pipline = UserConfigParams::m_dynamic_lights;
+ m_prev_img_quality = getImageQuality();
RibbonWidget* ribbon = getWidget("options_choice");
assert(ribbon != NULL);
ribbon->select( "tab_video", PLAYER_ID_GAME_MASTER );
@@ -341,7 +412,7 @@ void OptionsScreenVideo::updateGfxSlider()
{
if (m_presets[l].animatedCharacters == UserConfigParams::m_show_steering_animations &&
m_presets[l].animatedScenery == UserConfigParams::m_graphical_effects &&
- m_presets[l].anisotropy == UserConfigParams::m_anisotropic &&
+ m_presets[l].image_quality == getImageQuality() &&
m_presets[l].bloom == UserConfigParams::m_bloom &&
m_presets[l].glow == UserConfigParams::m_glow &&
m_presets[l].lights == UserConfigParams::m_dynamic_lights &&
@@ -354,8 +425,7 @@ void OptionsScreenVideo::updateGfxSlider()
m_presets[l].weather == UserConfigParams::m_weather_effects &&
m_presets[l].dof == UserConfigParams::m_dof &&
m_presets[l].global_illumination == UserConfigParams::m_gi &&
- m_presets[l].degraded_ibl == UserConfigParams::m_degraded_IBL &&
- m_presets[l].hd_textures == (UserConfigParams::m_high_definition_textures & 0x01))
+ m_presets[l].degraded_ibl == UserConfigParams::m_degraded_IBL)
{
gfx->setValue(l + 1);
found = true;
@@ -395,6 +465,19 @@ void OptionsScreenVideo::updateTooltip()
//I18N: if no kart animations are enabled
const core::stringw none = _LTR("None");
+ //I18N: in the graphical options tooltip;
+ // indicates the rendered image quality is very low
+ const core::stringw very_low = _LTR("Very Low");
+ //I18N: in the graphical options tooltip;
+ // indicates the rendered image quality is low
+ const core::stringw low = _LTR("Low");
+ //I18N: in the graphical options tooltip;
+ // indicates the rendered image quality is high
+ const core::stringw high = _LTR("High");
+ //I18N: in the graphical options tooltip;
+ // indicates the rendered image quality is very high
+ const core::stringw very_high = _LTR("Very High");
+
//I18N: in graphical options
// tooltip = tooltip + L"\n" + _("Pixel shaders: %s",
// UserConfigParams::m_pixel_shaders ? enabled : disabled);
@@ -448,9 +531,11 @@ void OptionsScreenVideo::updateTooltip()
UserConfigParams::m_gi ? enabled : disabled);
//I18N: in graphical options
- tooltip = tooltip + L"\n" + _("Use high definition textures: %s",
- (UserConfigParams::m_high_definition_textures & 0x1) == 0 ? disabled : enabled);
-
+ int quality = getImageQuality();
+ tooltip = tooltip + L"\n" + _("Rendered image quality: %s",
+ quality == 0 ? very_low : quality == 1 ? low : quality == 2 ?
+ high : very_high);
+
gfx->setTooltip(tooltip);
} // updateTooltip
@@ -518,7 +603,7 @@ void OptionsScreenVideo::eventCallback(Widget* widget, const std::string& name,
UserConfigParams::m_show_steering_animations = m_presets[level].animatedCharacters;
UserConfigParams::m_graphical_effects = m_presets[level].animatedScenery;
- UserConfigParams::m_anisotropic = m_presets[level].anisotropy;
+ setImageQuality(m_presets[level].image_quality);
UserConfigParams::m_bloom = m_presets[level].bloom;
UserConfigParams::m_glow = m_presets[level].glow;
UserConfigParams::m_dynamic_lights = m_presets[level].lights;
@@ -532,7 +617,6 @@ void OptionsScreenVideo::eventCallback(Widget* widget, const std::string& name,
UserConfigParams::m_dof = m_presets[level].dof;
UserConfigParams::m_gi = m_presets[level].global_illumination;
UserConfigParams::m_degraded_IBL = m_presets[level].degraded_ibl;
- UserConfigParams::m_high_definition_textures = 0x02 | m_presets[level].hd_textures;
updateGfxSlider();
}
@@ -563,6 +647,12 @@ void OptionsScreenVideo::tearDown()
{
if (m_prev_adv_pipline != UserConfigParams::m_dynamic_lights)
irr_driver->sameRestart();
+ else if (m_prev_img_quality != getImageQuality())
+ {
+ irr_driver->setMaxTextureSize();
+ STKTexManager::getInstance()->destroyThreadedTexLoaders();
+ STKTexManager::getInstance()->createThreadedTexLoaders();
+ }
Screen::tearDown();
// save changes when leaving screen
user_config->saveConfig();
diff --git a/src/states_screens/options_screen_video.hpp b/src/states_screens/options_screen_video.hpp
index 88444c3e9..95f966655 100644
--- a/src/states_screens/options_screen_video.hpp
+++ b/src/states_screens/options_screen_video.hpp
@@ -38,12 +38,11 @@ struct GFXPreset
bool weather;
bool animatedScenery;
int animatedCharacters;
- int anisotropy;
+ int image_quality;
/** Depth of field */
bool dof;
bool global_illumination;
bool degraded_ibl;
- int hd_textures;
};
/**
@@ -54,6 +53,7 @@ class OptionsScreenVideo : public GUIEngine::Screen, public GUIEngine::ScreenSin
{
private:
bool m_prev_adv_pipline;
+ int m_prev_img_quality;
OptionsScreenVideo();
bool m_inited;
std::vector m_presets;
@@ -81,6 +81,8 @@ public:
virtual void unloaded() OVERRIDE;
void updateGfxSlider();
+ static int getImageQuality();
+ static void setImageQuality(int quality);
};
#endif
diff --git a/src/states_screens/race_gui.cpp b/src/states_screens/race_gui.cpp
index 7ee8e0715..a052c7a92 100644
--- a/src/states_screens/race_gui.cpp
+++ b/src/states_screens/race_gui.cpp
@@ -128,8 +128,11 @@ RaceGUI::RaceGUI()
m_is_tutorial = (race_manager->getTrackName() == "tutorial");
+ // Load speedmeter texture before rendering the first frame
m_speed_meter_icon = material_manager->getMaterial("speedback.png");
+ m_speed_meter_icon->getTexture();
m_speed_bar_icon = material_manager->getMaterial("speedfore.png");
+ m_speed_bar_icon->getTexture();
//createMarkerTexture();
} // RaceGUI
@@ -401,8 +404,7 @@ void RaceGUI::drawGlobalMiniMap()
const Vec3& xyz = kart->getXYZ();
Vec3 draw_at;
track->mapPoint2MiniMap(xyz, &draw_at);
- draw_at *= UserConfigParams::m_scale_rtts_factor;
-
+
video::ITexture* icon = sta ?
irr_driver->getTexture(FileManager::GUI, "heart.png") :
kart->getKartProperties()->getMinimapIcon();
@@ -424,7 +426,7 @@ void RaceGUI::drawGlobalMiniMap()
{
Vec3 draw_at;
track->mapPoint2MiniMap(sw->getBallPosition(), &draw_at);
- draw_at *= UserConfigParams::m_scale_rtts_factor;
+
video::ITexture* icon =
irr_driver->getTexture(FileManager::GUI, "soccer_ball_normal.png");
diff --git a/src/states_screens/race_gui_overworld.cpp b/src/states_screens/race_gui_overworld.cpp
index bff708a30..d72e5825c 100644
--- a/src/states_screens/race_gui_overworld.cpp
+++ b/src/states_screens/race_gui_overworld.cpp
@@ -395,8 +395,7 @@ void RaceGUIOverworld::drawGlobalMiniMap()
kart_xyz= kart->getXYZ();
Vec3 draw_at;
track->mapPoint2MiniMap(kart_xyz, &draw_at);
- draw_at *= UserConfigParams::m_scale_rtts_factor;
-
+
video::ITexture* icon = kart->getKartProperties()->getMinimapIcon();
core::rect source(core::position2di(0, 0), icon->getSize());
int marker_half_size = (kart->getController()->isLocalPlayerController()
@@ -433,8 +432,7 @@ void RaceGUIOverworld::drawGlobalMiniMap()
Vec3 draw_at;
track->mapPoint2MiniMap(challenges[n].m_position, &draw_at);
- draw_at *= UserConfigParams::m_scale_rtts_factor;
-
+
const ChallengeData* challenge = unlock_manager->getChallengeData(challenges[n].m_challenge_id);
const unsigned int val = challenge->getNumTrophies();
bool unlocked = (PlayerManager::getCurrentPlayer()->getPoints() >= val);
diff --git a/src/tracks/track.cpp b/src/tracks/track.cpp
index 99c454b7e..2640de231 100644
--- a/src/tracks/track.cpp
+++ b/src/tracks/track.cpp
@@ -2176,9 +2176,7 @@ void Track::handleSky(const XMLNode &xml_node, const std::string &filename)
if (CVS->isGLSL())
{
t = STKTexManager::getInstance()->getTexture(v[i],
- false/*srgb*/, false/*premul_alpha*/,
- false/*set_material*/, false/*mesh_tex*/,
- true/*no_upload*/);
+ (TexConfig*)NULL/*tex_config*/, true/*no_upload*/);
}
else
#endif // !SERVER_ONLY
@@ -2222,9 +2220,7 @@ void Track::handleSky(const XMLNode &xml_node, const std::string &filename)
if (CVS->isGLSL())
{
t = STKTexManager::getInstance()->getTexture(v[i],
- false/*srgb*/, false/*premul_alpha*/,
- false/*set_material*/, false/*mesh_tex*/,
- true/*no_upload*/);
+ (TexConfig*)NULL/*tex_config*/, true/*no_upload*/);
}
else
#endif // !SERVER_ONLY
diff --git a/src/tracks/track_object_presentation.cpp b/src/tracks/track_object_presentation.cpp
index d47bf8119..7a2914171 100644
--- a/src/tracks/track_object_presentation.cpp
+++ b/src/tracks/track_object_presentation.cpp
@@ -862,9 +862,9 @@ TrackObjectPresentationBillboard::TrackObjectPresentationBillboard(
xml_node.get("start", &m_fade_out_start);
xml_node.get("end", &m_fade_out_end );
}
+ TexConfig tc(true/*srgb*/, true/*premul_alpha*/);
video::ITexture* texture = STKTexManager::getInstance()->getTexture
- (file_manager->searchTexture(texture_name), true/*srgb*/,
- true/*premul_alpha*/, false/*set_material*/, true/*mesh_tex*/);
+ (file_manager->searchTexture(texture_name), &tc);
if (texture == NULL)
{
diff --git a/src/utils/avi_writer.cpp b/src/utils/avi_writer.cpp
new file mode 100644
index 000000000..b6348e413
--- /dev/null
+++ b/src/utils/avi_writer.cpp
@@ -0,0 +1,625 @@
+//
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2015 Dawid Gan
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "utils/avi_writer.hpp"
+#include "config/user_config.hpp"
+#include "graphics/irr_driver.hpp"
+#include "guiengine/message_queue.hpp"
+#include "utils/translation.hpp"
+#include "utils/vs.hpp"
+
+#include
+#include
+
+Synchronised AVIWriter::m_recording_target("");
+// ----------------------------------------------------------------------------
+AVIWriter::AVIWriter() : m_idle(true)
+{
+ resetFrameBufferImage();
+ resetCaptureFormat();
+ m_file = NULL;
+ m_last_junk_chunk = 0;
+ m_end_of_header = 0;
+ m_movi_start = 0;
+ m_stream_bytes = 0;
+ m_total_frames = 0;
+ m_chunk_fcc = 0;
+ m_width = irr_driver->getActualScreenSize().Width;
+ m_height = irr_driver->getActualScreenSize().Height;
+ glGenBuffers(3, m_pbo);
+ for (int i = 0; i < 3; i++)
+ {
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo[i]);
+ glBufferData(GL_PIXEL_PACK_BUFFER, m_width * m_height * 4, NULL,
+ GL_STREAM_READ);
+ }
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ pthread_cond_init(&m_cond_request, NULL);
+ pthread_create(&m_thread, NULL, &startRoutine, this);
+} // AVIWriter
+
+// ----------------------------------------------------------------------------
+AVIWriter::~AVIWriter()
+{
+ glDeleteBuffers(3, m_pbo);
+ addFrameBufferImage(NULL, 0);
+ if (!waitForReadyToDeleted(2.0f))
+ Log::info("AVIWriter", "AVIWriter not stopping, exiting anyway.");
+ pthread_join(m_thread, NULL);
+ pthread_cond_destroy(&m_cond_request);
+} // ~AVIWriter
+
+// ----------------------------------------------------------------------------
+void AVIWriter::resetFrameBufferImage()
+{
+ m_pbo_use = 0;
+ m_accumulated_time = 0.0f;
+ m_remaining_time = 0.0f;
+} // resetFrameBufferImage
+
+// ----------------------------------------------------------------------------
+void AVIWriter::resetCaptureFormat()
+{
+ m_img_quality = UserConfigParams::m_record_compression;
+ m_msec_per_frame = unsigned(1000 / UserConfigParams::m_record_fps);
+ m_avi_format =
+ UserConfigParams::m_record_bmp ? AVI_FORMAT_BMP : AVI_FORMAT_JPG;
+} // resetCaptureFormat
+
+// ----------------------------------------------------------------------------
+void* AVIWriter::startRoutine(void *obj)
+{
+ VS::setThreadName("AVIWriter");
+ AVIWriter* avi_writer = (AVIWriter*)obj;
+ while (true)
+ {
+ avi_writer->m_fbi_queue.lock();
+ bool waiting = avi_writer->m_fbi_queue.getData().empty();
+ while (waiting)
+ {
+ pthread_cond_wait(&avi_writer->m_cond_request,
+ avi_writer->m_fbi_queue.getMutex());
+ waiting = avi_writer->m_fbi_queue.getData().empty();
+ }
+ auto& p = avi_writer->m_fbi_queue.getData().front();
+ uint8_t* fbi = p.first;
+ int frame_count = p.second;
+ if (frame_count == -1)
+ {
+ avi_writer->closeFile();
+ avi_writer->m_idle.setAtomic(true);
+ avi_writer->m_fbi_queue.getData().pop_front();
+ avi_writer->m_fbi_queue.unlock();
+ continue;
+ }
+ else if (fbi == NULL)
+ {
+ avi_writer->closeFile(false/*delete_file*/, true/*exiting*/);
+ avi_writer->setCanBeDeleted();
+ avi_writer->m_fbi_queue.getData().pop_front();
+ avi_writer->m_fbi_queue.unlock();
+ return NULL;
+ }
+ const bool too_slow = avi_writer->m_fbi_queue.getData().size() > 50;
+ avi_writer->m_fbi_queue.getData().pop_front();
+ avi_writer->m_fbi_queue.unlock();
+ if (too_slow)
+ {
+ MessageQueue::add(MessageQueue::MT_ERROR,
+ _("Encoding is too slow, dropping frames."));
+ delete [] fbi;
+ avi_writer->cleanAllFrameBufferImages();
+ continue;
+ }
+ if (avi_writer->m_file == NULL)
+ {
+ bool ret = avi_writer->createFile();
+ if (!ret)
+ {
+ delete [] fbi;
+ avi_writer->cleanAllFrameBufferImages();
+ continue;
+ }
+ }
+ uint8_t* orig_fbi = fbi;
+ const unsigned width = avi_writer->m_width;
+ const unsigned height = avi_writer->m_height;
+ const unsigned area = width * height;
+ int size = area * 4;
+ int dest = size - 3;
+ int src = size - 4;
+ int copied = 0;
+ while (true)
+ {
+ if (copied++ > 1)
+ memcpy(fbi + dest, fbi + src, 3);
+ else
+ memmove(fbi + dest, fbi + src, 3);
+ if (src == 0)
+ break;
+ dest -= 3;
+ src -= 4;
+ }
+ fbi = fbi + area;
+ const int pitch = width * 3;
+ uint8_t* p2 = fbi + (height - 1) * pitch;
+ uint8_t* tmp_buf = new uint8_t[pitch];
+ for (unsigned i = 0; i < height; i += 2)
+ {
+ memcpy(tmp_buf, fbi, pitch);
+ memcpy(fbi, p2, pitch);
+ memcpy(p2, tmp_buf, pitch);
+ fbi += pitch;
+ p2 -= pitch;
+ }
+ delete [] tmp_buf;
+ size = area * 3;
+ if (avi_writer->m_avi_format == AVI_FORMAT_JPG)
+ {
+ uint8_t* jpg = new uint8_t[size];
+ size = avi_writer->bmpToJpg(orig_fbi + area, jpg, size);
+ delete [] orig_fbi;
+ orig_fbi = jpg;
+ }
+ while (frame_count != 0)
+ {
+ AVIErrCode code = avi_writer->addImage
+ (avi_writer->m_avi_format == AVI_FORMAT_JPG ? orig_fbi :
+ orig_fbi + area, size);
+ if (code == AVI_SIZE_LIMIT_ERR)
+ {
+ avi_writer->createFile();
+ continue;
+ }
+ else if (code == AVI_IO_ERR)
+ break;
+ frame_count--;
+ }
+ delete [] orig_fbi;
+ }
+ return NULL;
+} // startRoutine
+
+// ----------------------------------------------------------------------------
+int AVIWriter::getFrameCount(float dt)
+{
+ const float frame_rate = 0.001f * m_msec_per_frame;
+ m_accumulated_time += dt;
+ if (m_accumulated_time < frame_rate && m_remaining_time < frame_rate)
+ {
+ return 0;
+ }
+ int frame_count = 1;
+ m_remaining_time += m_accumulated_time - frame_rate;
+ m_accumulated_time = 0.0f;
+ while (m_remaining_time > frame_rate)
+ {
+ frame_count++;
+ m_remaining_time -= frame_rate;
+ }
+ return frame_count;
+} // getFrameCount
+
+// ----------------------------------------------------------------------------
+void AVIWriter::captureFrameBufferImage(float dt)
+{
+ glReadBuffer(GL_BACK);
+ int pbo_read = -1;
+ if (m_pbo_use > 3 && m_pbo_use % 3 == 0)
+ m_pbo_use = 3;
+ if (m_pbo_use >= 3)
+ {
+ int frame_count = getFrameCount(dt);
+ if (frame_count != 0)
+ {
+ pbo_read = m_pbo_use % 3;
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo[pbo_read]);
+ void* ptr = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
+ const unsigned size = m_width * m_height * 4;
+ uint8_t* fbi = new uint8_t[size];
+ memcpy(fbi, ptr, size);
+ glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
+ addFrameBufferImage(fbi, frame_count);
+ }
+ }
+ int pbo_use = m_pbo_use++ % 3;
+ assert(pbo_read == -1 || pbo_use == pbo_read);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo[pbo_use]);
+ glReadPixels(0, 0, m_width, m_height,
+ m_avi_format == AVI_FORMAT_JPG ? GL_RGBA: GL_BGRA,
+ GL_UNSIGNED_BYTE, NULL);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+} // captureFrameBufferImage
+
+// ----------------------------------------------------------------------------
+bool AVIWriter::addJUNKChunk(std::string str, unsigned int min_size)
+{
+ int size = str.size() < min_size ? min_size : str.size() + 1;
+ size = (size + 1) & 0xfffffffe;
+
+ CHUNK chunk;
+ chunk.fcc = FOURCC('J', 'U', 'N', 'K');
+ chunk.cb = size;
+
+ char* buffer = (char*)calloc(size, 1);
+ strcpy(buffer, str.c_str());
+
+ int num = fwrite(&chunk, 1, sizeof(chunk), m_file);
+ if (num != sizeof(chunk))
+ goto error;
+
+ num = fwrite(buffer, 1, size * sizeof(char), m_file);
+ free(buffer);
+ if (num != size)
+ goto error;
+
+ m_last_junk_chunk = ftell(m_file);
+ if (m_last_junk_chunk < 0)
+ goto error;
+
+ return true;
+
+error:
+ closeFile(true/*delete_file*/);
+ return false;
+} // addJUNKChunk
+
+// ----------------------------------------------------------------------------
+AVIErrCode AVIWriter::addImage(unsigned char* buffer, int buf_size)
+{
+ if (m_file == NULL)
+ goto error;
+
+ int num; num = ftell(m_file);
+ if (num < 0)
+ goto error;
+
+ if (m_total_frames >= MAX_FRAMES)
+ goto size_limit;
+
+ CHUNK chunk;
+ chunk.fcc = m_chunk_fcc;
+ chunk.cb = buf_size;
+
+ m_index_table[m_total_frames].Offset = num;
+ m_index_table[m_total_frames].Length = chunk.cb;
+ m_index_table[m_total_frames].fcc = chunk.fcc;
+
+ num = fwrite(&chunk, 1, sizeof(chunk), m_file);
+ if (num != sizeof(chunk))
+ goto error;
+
+ num = fwrite(buffer, 1, buf_size, m_file);
+ if (num != buf_size)
+ goto error;
+
+ int fill_size; fill_size = (sizeof(chunk) + buf_size) & 0x00000001;
+ if (fill_size > 0)
+ {
+ uint32_t filler = 0;
+ num = fwrite(&filler, 1, fill_size, m_file);
+ if (num != fill_size)
+ goto error;
+ }
+
+ m_stream_bytes += sizeof(chunk) + buf_size + fill_size;
+ m_total_frames++;
+
+ num = ftell(m_file);
+ if (num < 0)
+ goto error;
+
+ if (((num - m_last_junk_chunk) > 20000) && (!addJUNKChunk("", 1)))
+ goto error;
+
+ // check if we reached the file size limit
+ if (num >= MAX_FILE_SIZE)
+ goto size_limit;
+
+ return AVI_SUCCESS;
+
+error:
+ closeFile(true/*delete_file*/);
+ return AVI_IO_ERR;
+
+size_limit:
+ MessageQueue::add(MessageQueue::MT_GENERIC,
+ _("Video exceeded size limit, starting a new one."));
+ closeFile();
+ return AVI_SIZE_LIMIT_ERR;
+} // addImage
+
+// ----------------------------------------------------------------------------
+bool AVIWriter::closeFile(bool delete_file, bool exiting)
+{
+ if (m_file == NULL)
+ return false;
+
+ if (delete_file)
+ goto error;
+
+ // add the index
+ int idx_start; idx_start = ftell(m_file);
+ if (idx_start < 0)
+ goto error;
+
+ CHUNK chunk;
+ chunk.fcc = FOURCC('i', 'd', 'x', '1');
+ chunk.cb = sizeof(AVIINDEXENTRY) * m_total_frames;
+
+ int num; num = fwrite(&chunk, 1, sizeof(chunk), m_file);
+ if (num != sizeof(chunk))
+ goto error;
+
+ for (unsigned int i = 0; i < m_total_frames; i++)
+ {
+ AVIINDEXENTRY Index;
+ Index.ckid = m_index_table[i].fcc;
+ Index.dwFlags = AVIIF_KEYFRAME;
+ Index.dwChunkOffset = m_index_table[i].Offset;
+ Index.dwChunkLength = m_index_table[i].Length;
+
+ num = fwrite(&Index, 1, sizeof(Index), m_file);
+ if (num != sizeof(Index))
+ goto error;
+ }
+
+ // update the header
+ if (m_total_frames > 0 && m_msec_per_frame > 0)
+ {
+ num = fseek(m_file, 0, SEEK_END);
+ if (num < 0)
+ goto error;
+
+ int size; size = ftell(m_file);
+ if (size < 0)
+ goto error;
+
+ num = fseek(m_file, 0, SEEK_SET);
+ if (num < 0)
+ goto error;
+
+ m_avi_hdr.riff.cb = size - sizeof(m_avi_hdr.riff);
+ m_avi_hdr.avih.dwMaxBytesPerSec = (uint32_t)
+ (((m_stream_bytes / m_total_frames) * m_format_hdr.strh.dwRate) /
+ m_msec_per_frame + 0.5f);
+ m_avi_hdr.avih.dwTotalFrames = m_total_frames;
+
+ num = fwrite(&m_avi_hdr, 1, sizeof(m_avi_hdr), m_file);
+ if (num != sizeof(m_avi_hdr))
+ goto error;
+
+ m_format_hdr.strh.dwLength = m_total_frames;
+
+ num = fwrite(&m_format_hdr, 1, sizeof(m_format_hdr), m_file);
+ if (num != sizeof(m_format_hdr))
+ goto error;
+ }
+
+ // update the movi section
+ m_movi_chunk.cb = idx_start - m_movi_start;
+
+ num = fseek(m_file, m_movi_start - sizeof(m_movi_chunk), SEEK_SET);
+ if (num < 0)
+ goto error;
+
+ num = fwrite(&m_movi_chunk, 1, sizeof(m_movi_chunk), m_file);
+ if (num != sizeof(m_movi_chunk))
+ goto error;
+
+ fclose(m_file);
+ m_file = NULL;
+
+ if (!exiting)
+ {
+ MessageQueue::add(MessageQueue::MT_GENERIC,
+ _("Video saved in \"%s\".", m_filename.c_str()));
+ }
+ return true;
+
+error:
+ if (!exiting)
+ {
+ MessageQueue::add(MessageQueue::MT_ERROR,
+ _("Error when saving video."));
+ }
+ fclose(m_file);
+ remove(m_filename.c_str());
+ m_file = NULL;
+ return false;
+} // closeFile
+
+// ----------------------------------------------------------------------------
+bool AVIWriter::createFile()
+{
+ m_idle.setAtomic(false);
+ time_t rawtime;
+ time(&rawtime);
+ tm* timeInfo = localtime(&rawtime);
+ char time_buffer[256];
+ sprintf(time_buffer, "%i.%02i.%02i_%02i.%02i.%02i",
+ timeInfo->tm_year + 1900, timeInfo->tm_mon + 1,
+ timeInfo->tm_mday, timeInfo->tm_hour,
+ timeInfo->tm_min, timeInfo->tm_sec);
+
+ m_filename = m_recording_target.getAtomic() + "-" + time_buffer + ".avi";
+ m_stream_bytes = 0;
+ m_total_frames = 0;
+ m_movi_start = 0;
+ m_last_junk_chunk = 0;
+
+ BitmapInfoHeader bitmap_hdr;
+ bitmap_hdr.biSize = sizeof(BitmapInfoHeader);
+ bitmap_hdr.biWidth = m_width;
+ bitmap_hdr.biHeight = m_height;
+ bitmap_hdr.biPlanes = 1;
+ bitmap_hdr.biBitCount = 24;
+ bitmap_hdr.biCompression = 0;
+ bitmap_hdr.biSizeImage = (m_width * m_height * 3 * bitmap_hdr.biPlanes);
+ bitmap_hdr.biXPelsPerMeter = 0;
+ bitmap_hdr.biYPelsPerMeter = 0;
+ bitmap_hdr.biClrUsed = 0;
+ bitmap_hdr.biClrImportant = 0;
+
+ memset(&m_avi_hdr, '\0', sizeof(m_avi_hdr));
+ m_avi_hdr.riff.fcc = FOURCC('R', 'I', 'F', 'F');
+ m_avi_hdr.riff.cb = 0; // update when finished (size of the file - 8)
+ m_avi_hdr.avi = FOURCC('A', 'V', 'I', ' ');
+ m_avi_hdr.list1.fcc = FOURCC('L', 'I', 'S', 'T');
+ m_avi_hdr.list1.cb = 0;
+ m_avi_hdr.hdrl = FOURCC('h', 'd', 'r', 'l');
+ m_avi_hdr.avihhdr.fcc = FOURCC('a', 'v', 'i', 'h');
+ m_avi_hdr.avihhdr.cb = sizeof(m_avi_hdr.avih);
+ m_avi_hdr.avih.dwMicroSecPerFrame = m_msec_per_frame * 1000;
+ m_avi_hdr.avih.dwMaxBytesPerSec = 0; // update when finished
+ m_avi_hdr.avih.dwPaddingGranularity = 0;
+ m_avi_hdr.avih.dwFlags = AVIF_WASCAPTUREFILE | AVIF_HASINDEX;
+ m_avi_hdr.avih.dwTotalFrames = 0; // update when finished
+ m_avi_hdr.avih.dwInitialFrames = 0;
+ m_avi_hdr.avih.dwStreams = 1; // 1 = video, 2 = video and audio
+ m_avi_hdr.avih.dwSuggestedBufferSize = 0; // can be just 0
+ m_avi_hdr.avih.dwWidth = m_width;
+ m_avi_hdr.avih.dwHeight = m_height;
+
+ m_format_hdr.list.fcc = FOURCC('L', 'I', 'S', 'T');
+ m_format_hdr.list.cb = (sizeof(m_format_hdr) - 8) +
+ sizeof(BitmapInfoHeader);
+ m_format_hdr.strl = FOURCC('s', 't', 'r', 'l');
+ m_format_hdr.strhhdr.fcc = FOURCC('s', 't', 'r', 'h');
+ m_format_hdr.strhhdr.cb = sizeof(m_format_hdr.strh);
+ m_format_hdr.strh.fccType = FOURCC('v', 'i', 'd', 's');
+ m_format_hdr.strh.fccHandler = CC_DIB;
+ m_format_hdr.strh.dwFlags = 0;
+ m_format_hdr.strh.wPriority = 0;
+ m_format_hdr.strh.wLanguage = 0;
+ m_format_hdr.strh.dwInitialFrames = 0;
+ m_format_hdr.strh.dwScale = m_msec_per_frame;
+ m_format_hdr.strh.dwRate = 1000;
+ m_format_hdr.strh.dwStart = 0;
+ m_format_hdr.strh.dwLength = 0; // update when finished
+ m_format_hdr.strh.dwSuggestedBufferSize = 0; // can be just 0
+ m_format_hdr.strh.dwQuality = m_img_quality * 100;
+ m_format_hdr.strh.dwSampleSize = 0;
+ m_format_hdr.strh.Left = 0;
+ m_format_hdr.strh.Top = 0;
+ m_format_hdr.strh.Right = m_avi_hdr.avih.dwWidth;
+ m_format_hdr.strh.Bottom = m_avi_hdr.avih.dwHeight;
+ m_format_hdr.strfhdr.fcc = FOURCC('s', 't', 'r', 'f');
+ m_format_hdr.strfhdr.cb = sizeof(BitmapInfoHeader);
+
+ // Format specific changes
+ if (m_avi_format == AVI_FORMAT_JPG)
+ {
+ m_format_hdr.strh.fccHandler = CC_MJPG;
+ bitmap_hdr.biCompression = FOURCC('M', 'J', 'P', 'G');
+ m_chunk_fcc = FOURCC('0', '0', 'd', 'c');
+ }
+ else if (m_avi_format == AVI_FORMAT_BMP)
+ {
+ bitmap_hdr.biHeight = -m_height;
+ bitmap_hdr.biCompression = 0;
+ m_chunk_fcc = FOURCC('0', '0', 'd', 'b');
+ }
+
+ const uint32_t fcc_movi = FOURCC('m', 'o', 'v', 'i');
+
+ m_file = fopen(m_filename.c_str(), "wb");
+ if (m_file == NULL)
+ return false;
+
+ int num = fwrite(&m_avi_hdr, 1, sizeof(m_avi_hdr), m_file);
+ if (num != sizeof(m_avi_hdr))
+ goto error;
+
+ num = fwrite(&m_format_hdr, 1, sizeof(m_format_hdr), m_file);
+ if (num != sizeof(m_format_hdr))
+ goto error;
+
+ num = fwrite(&bitmap_hdr, 1, sizeof(BitmapInfoHeader), m_file);
+ if (num != sizeof(BitmapInfoHeader))
+ goto error;
+
+ m_end_of_header = ftell(m_file);
+ if (m_end_of_header < 0)
+ goto error;
+
+ if (!addJUNKChunk("", 2840))
+ goto error;
+
+ m_avi_hdr.list1.cb = m_end_of_header - sizeof(m_avi_hdr.riff) -
+ sizeof(m_avi_hdr.avi) - sizeof(m_avi_hdr.list1);
+ m_movi_chunk.fcc = FOURCC('L', 'I', 'S', 'T');
+ m_movi_chunk.cb = 0; // update when finished
+
+ num = fwrite(&m_movi_chunk, 1, sizeof(m_movi_chunk), m_file);
+ if (num != sizeof(m_movi_chunk))
+ goto error;
+
+ m_movi_start = ftell(m_file);
+ if (m_movi_start < 0)
+ goto error;
+
+ num = fwrite(&fcc_movi, 1, sizeof(fcc_movi), m_file);
+ if (num != sizeof(fcc_movi))
+ goto error;
+
+ return true;
+
+error:
+ closeFile(true/*delete_file*/);
+ return false;
+} // createFile
+
+// ----------------------------------------------------------------------------
+int AVIWriter::bmpToJpg(unsigned char* image_data, unsigned char* image_output,
+ unsigned long buf_length)
+{
+ struct jpeg_compress_struct cinfo;
+ struct jpeg_error_mgr jerr;
+ cinfo.err = jpeg_std_error(&jerr);
+
+ jpeg_create_compress(&cinfo);
+
+ cinfo.image_width = m_width;
+ cinfo.image_height = m_height;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+
+ jpeg_set_defaults(&cinfo);
+ jpeg_set_quality(&cinfo, m_img_quality, true);
+
+ jpeg_mem_dest(&cinfo, &image_output, &buf_length);
+
+ jpeg_start_compress(&cinfo, true);
+
+ JSAMPROW jrow[1];
+ while (cinfo.next_scanline < cinfo.image_height)
+ {
+ jrow[0] = &image_data[cinfo.next_scanline * m_width * 3];
+ jpeg_write_scanlines(&cinfo, jrow, 1);
+ }
+
+ jpeg_finish_compress(&cinfo);
+ jpeg_destroy_compress(&cinfo);
+
+ return buf_length;
+} // bmpToJpg
+
+#endif
diff --git a/src/utils/avi_writer.hpp b/src/utils/avi_writer.hpp
new file mode 100644
index 000000000..66e455f89
--- /dev/null
+++ b/src/utils/avi_writer.hpp
@@ -0,0 +1,259 @@
+//
+// SuperTuxKart - a fun racing game with go-kart
+// Copyright (C) 2015 Dawid Gan
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#if !(defined(SERVER_ONLY) || defined(USE_GLES2))
+
+#include "graphics/gl_headers.hpp"
+#include "utils/can_be_deleted.hpp"
+#include "utils/no_copy.hpp"
+#include "utils/singleton.hpp"
+#include "utils/synchronised.hpp"
+
+#include
+#include
+
+#include
+
+#define FOURCC(a,b,c,d) ((uint32_t) (((d)<<24) | ((c)<<16) | ((b)<<8) | (a)))
+
+const uint32_t CC_MJPG = FOURCC('m', 'j', 'p', 'g');
+const uint32_t CC_DIB = FOURCC('\0', '\0', '\0', '\0');
+const uint32_t CC_VIDS = FOURCC('v', 'i', 'd', 's');
+
+const uint32_t AVIF_HASINDEX = 0x00000010;
+const uint32_t AVIF_MUSTUSEINDEX = 0x00000020;
+const uint32_t AVIF_ISINTERLEAVED = 0x00000100;
+const uint32_t AVIF_TRUSTCKTYPE = 0x00000800;
+const uint32_t AVIF_WASCAPTUREFILE = 0x00010000;
+const uint32_t AVIF_COPYRIGHTED = 0x00020000;
+
+const uint32_t AVISF_DISABLED = 0x00000001;
+const uint32_t AVISF_VIDEO_PALCHANGES = 0x00010000;
+
+const uint32_t AVIIF_LIST = 0x00000001;
+const uint32_t AVIIF_KEYFRAME = 0x00000010;
+const uint32_t AVIIF_FIRSTPART = 0x00000020;
+const uint32_t AVIIF_LASTPART = 0x00000040;
+const uint32_t AVIIF_MIDPART = 0x00000060;
+const uint32_t AVIIF_NOTIME = 0x00000100;
+const uint32_t AVIIF_COMPUSE = 0x0FFF0000;
+
+enum AVIFormat
+{
+ AVI_FORMAT_BMP,
+ AVI_FORMAT_JPG
+};
+
+enum AVIErrCode
+{
+ AVI_SUCCESS,
+ AVI_SIZE_LIMIT_ERR,
+ AVI_IO_ERR
+};
+
+const int MAX_FRAMES = 1000000;
+const int MAX_FILE_SIZE = 2000000000;
+
+struct MainAVIHeader
+{
+ uint32_t dwMicroSecPerFrame;
+ uint32_t dwMaxBytesPerSec;
+ uint32_t dwPaddingGranularity;
+ uint32_t dwFlags;
+ uint32_t dwTotalFrames;
+ uint32_t dwInitialFrames;
+ uint32_t dwStreams;
+ uint32_t dwSuggestedBufferSize;
+ uint32_t dwWidth;
+ uint32_t dwHeight;
+ uint32_t dwReserved[4];
+};
+
+struct AVIStreamHeader
+{
+ uint32_t fccType;
+ uint32_t fccHandler;
+ uint32_t dwFlags;
+ uint16_t wPriority;
+ uint16_t wLanguage;
+ uint32_t dwInitialFrames;
+ uint32_t dwScale;
+ uint32_t dwRate;
+ uint32_t dwStart;
+ uint32_t dwLength;
+ uint32_t dwSuggestedBufferSize;
+ uint32_t dwQuality;
+ uint32_t dwSampleSize;
+ uint16_t Left;
+ uint16_t Top;
+ uint16_t Right;
+ uint16_t Bottom;
+};
+
+struct BitmapInfoHeader
+{
+ uint32_t biSize;
+ uint32_t biWidth;
+ uint32_t biHeight;
+ uint16_t biPlanes;
+ uint16_t biBitCount;
+ uint32_t biCompression;
+ uint32_t biSizeImage;
+ uint32_t biXPelsPerMeter;
+ uint32_t biYPelsPerMeter;
+ uint32_t biClrUsed;
+ uint32_t biClrImportant;
+};
+
+struct AVIINDEXENTRY
+{
+ uint32_t ckid;
+ uint32_t dwFlags;
+ uint32_t dwChunkOffset;
+ uint32_t dwChunkLength;
+};
+
+struct CHUNK
+{
+ uint32_t fcc;
+ uint32_t cb;
+};
+
+struct AVIHeader
+{
+ CHUNK riff;
+ uint32_t avi;
+ CHUNK list1;
+ uint32_t hdrl;
+ CHUNK avihhdr;
+ MainAVIHeader avih;
+};
+
+struct FormatHeader
+{
+ CHUNK list;
+ uint32_t strl;
+ CHUNK strhhdr;
+ AVIStreamHeader strh;
+ CHUNK strfhdr;
+};
+
+struct IndexTable
+{
+ uint32_t Offset;
+ uint32_t Length;
+ uint32_t fcc;
+};
+
+
+class AVIWriter : public CanBeDeleted, public NoCopy,
+ public Singleton
+{
+private:
+ FILE* m_file;
+
+ static Synchronised m_recording_target;
+
+ std::string m_filename;
+
+ int m_last_junk_chunk, m_end_of_header, m_movi_start, m_img_quality,
+ m_width, m_height;
+
+ unsigned int m_msec_per_frame, m_stream_bytes, m_total_frames, m_pbo_use;
+
+ float m_accumulated_time, m_remaining_time;
+
+ AVIFormat m_avi_format;
+
+ AVIHeader m_avi_hdr;
+
+ CHUNK m_movi_chunk;
+
+ FormatHeader m_format_hdr;
+
+ IndexTable m_index_table[MAX_FRAMES];
+
+ uint32_t m_chunk_fcc;
+
+ Synchronised > > m_fbi_queue;
+
+ Synchronised m_idle;
+
+ pthread_t m_thread;
+
+ pthread_cond_t m_cond_request;
+
+ GLuint m_pbo[3];
+
+ // ------------------------------------------------------------------------
+ int bmpToJpg(unsigned char* image_data, unsigned char* image_output,
+ unsigned long buf_length);
+ // ------------------------------------------------------------------------
+ AVIErrCode addImage(unsigned char* buffer, int size);
+ // ------------------------------------------------------------------------
+ bool closeFile(bool delete_file = false, bool exiting = false);
+ // ------------------------------------------------------------------------
+ bool createFile();
+ // ------------------------------------------------------------------------
+ bool addJUNKChunk(std::string str, unsigned int min_size);
+ // ------------------------------------------------------------------------
+ void addFrameBufferImage(uint8_t* fbi, int frame_count)
+ {
+ m_fbi_queue.lock();
+ m_fbi_queue.getData().emplace_back(fbi, frame_count);
+ pthread_cond_signal(&m_cond_request);
+ m_fbi_queue.unlock();
+ }
+ // ------------------------------------------------------------------------
+ int getFrameCount(float dt);
+ // ------------------------------------------------------------------------
+ void cleanAllFrameBufferImages()
+ {
+ m_fbi_queue.lock();
+ for (auto& p : m_fbi_queue.getData())
+ delete [] p.first;
+ m_fbi_queue.getData().clear();
+ m_fbi_queue.unlock();
+ }
+ // ------------------------------------------------------------------------
+ bool isIdle() const { return m_idle.getAtomic(); }
+
+public:
+ // ------------------------------------------------------------------------
+ AVIWriter();
+ // ------------------------------------------------------------------------
+ ~AVIWriter();
+ // ------------------------------------------------------------------------
+ static void* startRoutine(void *obj);
+ // ------------------------------------------------------------------------
+ static void setRecordingTarget(const std::string& name)
+ {
+ m_recording_target.setAtomic(name);
+ }
+ // ------------------------------------------------------------------------
+ void captureFrameBufferImage(float dt);
+ // ------------------------------------------------------------------------
+ void resetFrameBufferImage();
+ // ------------------------------------------------------------------------
+ void resetCaptureFormat();
+ // ------------------------------------------------------------------------
+ void stopRecording() { addFrameBufferImage(NULL, -1); }
+
+};
+
+#endif
diff --git a/src/utils/debug.cpp b/src/utils/debug.cpp
index 2f39ccf5d..2c0826139 100644
--- a/src/utils/debug.cpp
+++ b/src/utils/debug.cpp
@@ -134,6 +134,8 @@ enum DebugMenuCommand
DEBUG_SCRIPT_CONSOLE,
DEBUG_RUN_CUTSCENE,
DEBUG_TEXTURE_CONSOLE,
+ DEBUG_START_RECORDING,
+ DEBUG_STOP_RECORDING
}; // DebugMenuCommand
// -----------------------------------------------------------------------------
@@ -541,7 +543,6 @@ bool handleContextMenuAction(s32 cmd_id)
break;
case DEBUG_VISUAL_VALUES:
{
-#if !defined(__APPLE__)
DebugSliderDialog *dsd = new DebugSliderDialog();
dsd->setSliderHook("red_slider", 0, 255,
[](){ return int(irr_driver->getAmbientLight().r * 255.f); },
@@ -576,12 +577,10 @@ bool handleContextMenuAction(s32 cmd_id)
[](){ return int(irr_driver->getSSAOSigma() * 10.f); },
[](int v){irr_driver->setSSAOSigma(v / 10.f); }
);
-#endif
}
break;
case DEBUG_ADJUST_LIGHTS:
{
-#if !defined(__APPLE__)
// Some sliders use multipliers because the spinner widget
// only supports integers
DebugSliderDialog *dsd = new DebugSliderDialog();
@@ -635,7 +634,6 @@ bool handleContextMenuAction(s32 cmd_id)
[](int v){ findNearestLight()->setRadius(float(v)); }
);
dsd->changeLabel("SSAO Sigma", "[None]");
-#endif
break;
}
case DEBUG_SCRIPT_CONSOLE:
@@ -711,6 +709,12 @@ bool handleContextMenuAction(s32 cmd_id)
return false;
});
break;
+ case DEBUG_START_RECORDING:
+ irr_driver->setRecording(true);
+ break;
+ case DEBUG_STOP_RECORDING:
+ irr_driver->setRecording(false);
+ break;
} // switch
return false;
}
@@ -793,8 +797,13 @@ bool onEvent(const SEvent &event)
sub->addItem(L"Toggle smooth camera", DEBUG_GUI_CAM_SMOOTH);
sub->addItem(L"Attach fps camera to kart", DEBUG_GUI_CAM_ATTACH);
- mnu->addItem(L"Change camera target >",-1,true, true);
+ mnu->addItem(L"Recording >",-1,true, true);
sub = mnu->getSubMenu(4);
+ sub->addItem(L"Start recording", DEBUG_START_RECORDING);
+ sub->addItem(L"Stop recording", DEBUG_STOP_RECORDING);
+
+ mnu->addItem(L"Change camera target >",-1,true, true);
+ sub = mnu->getSubMenu(5);
sub->addItem(L"To kart one", DEBUG_VIEW_KART_ONE);
sub->addItem(L"To kart two", DEBUG_VIEW_KART_TWO);
sub->addItem(L"To kart three", DEBUG_VIEW_KART_THREE);
@@ -805,7 +814,7 @@ bool onEvent(const SEvent &event)
sub->addItem(L"To kart eight", DEBUG_VIEW_KART_EIGHT);
mnu->addItem(L"Font >",-1,true, true);
- sub = mnu->getSubMenu(5);
+ sub = mnu->getSubMenu(6);
sub->addItem(L"Dump glyph pages of fonts", DEBUG_FONT_DUMP_GLYPH_PAGE);
sub->addItem(L"Reload all fonts", DEBUG_FONT_RELOAD);
diff --git a/src/utils/vs.hpp b/src/utils/vs.hpp
index 503c93959..68c8b4763 100644
--- a/src/utils/vs.hpp
+++ b/src/utils/vs.hpp
@@ -40,6 +40,10 @@
# include
#endif
+#if defined(__linux__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__)
+# include
+#endif
+
namespace VS
{
#if defined(_MSC_VER) && defined(DEBUG)
@@ -77,6 +81,13 @@ namespace VS
{
}
+ } // setThreadName
+#elif defined(__linux__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__)
+ static void setThreadName(const char* name)
+ {
+#if __GLIBC__ > 2 || __GLIBC_MINOR__ > 11
+ pthread_setname_np(pthread_self(), name);
+#endif
} // setThreadName
#else
static void setThreadName(const char* name)