4099 lines
124 KiB
C
4099 lines
124 KiB
C
/* -----------------------------------------------------------------------------
|
|
*
|
|
* Copyright (c) 2014-2017 Alexis Naveros.
|
|
* Portions developed under contract to the SURVICE Engineering Company.
|
|
*
|
|
* This software is provided 'as-is', without any express or implied
|
|
* warranty. In no event will the authors be held liable for any damages
|
|
* arising from the use of this software.
|
|
*
|
|
* Permission is granted to anyone to use this software for any purpose,
|
|
* including commercial applications, and to alter it and redistribute it
|
|
* freely, subject to the following restrictions:
|
|
*
|
|
* 1. The origin of this software must not be misrepresented; you must not
|
|
* claim that you wrote the original software. If you use this software
|
|
* in a product, an acknowledgment in the product documentation would be
|
|
* appreciated but is not required.
|
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
* misrepresented as being the original software.
|
|
* 3. This notice may not be removed or altered from any source distribution.
|
|
*
|
|
* -----------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <float.h>
|
|
#include <limits.h>
|
|
|
|
|
|
#include "cpusimd.h"
|
|
|
|
#include "img.h"
|
|
#include "imgresize.h"
|
|
|
|
|
|
////
|
|
|
|
|
|
#define IM_RESIZE_DEBUG (0)
|
|
#define IM_RESIZE_DEBUG_PROGRESS (0)
|
|
|
|
|
|
////
|
|
|
|
|
|
#ifndef M_PI
|
|
#define M_PI (3.14159265358979323846)
|
|
#endif
|
|
|
|
#ifndef ADDRESS
|
|
#define ADDRESS(p,o) ((void *)(((char *)p)+(o)))
|
|
#endif
|
|
|
|
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
|
|
#define CC_ALWAYSINLINE __attribute__((always_inline))
|
|
#if __STDC_VERSION__ >= 199901L
|
|
#define CC_RESTRICT restrict
|
|
#else
|
|
#define CC_RESTRICT
|
|
#endif
|
|
#elif defined(_MSC_VER)
|
|
#define CC_ALWAYSINLINE __forceinline
|
|
#define CC_RESTRICT __restrict
|
|
#else
|
|
#define CC_ALWAYSINLINE
|
|
#define CC_RESTRICT
|
|
#endif
|
|
|
|
static inline CC_ALWAYSINLINE uint32_t ccIsPow2Int32( uint32_t v )
|
|
{
|
|
return ( ( v & ( v - 1 ) ) == 0 );
|
|
}
|
|
|
|
#define ROUND_POSITIVE_FLOAT(x) ((int)((x)+0.5f))
|
|
|
|
|
|
////
|
|
|
|
|
|
#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__i386__) || defined(__i386) || defined(i386) || defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86) || defined(_X86_)
|
|
|
|
/* Input is 0.0,255.0, output is 0.0,1.0 */
|
|
static inline CC_ALWAYSINLINE float srgb2linear( float v )
|
|
{
|
|
float v2, vpow, vpwsqrt;
|
|
union
|
|
{
|
|
int32_t i;
|
|
float f;
|
|
} u;
|
|
if( v <= (0.04045f*255.0f) )
|
|
v = v * ( (1.0f/12.92f)*(1.0f/255.0f) );
|
|
else
|
|
{
|
|
v = ( v + (0.055f*255.0f) ) * ( (1.0f/1.055f)*(1.0f/255.0f) );
|
|
v2 = v * v;
|
|
u.f = v * 5417434112.0f;
|
|
u.i = (int32_t)ROUND_POSITIVE_FLOAT( (float)u.i * 0.8f );
|
|
vpow = u.f;
|
|
vpwsqrt = sqrtf( vpow );
|
|
v = ( ( v2 * vpwsqrt ) + ( ( ( v2 * v ) / vpwsqrt ) / sqrtf( vpwsqrt ) ) ) * 0.51011878327f;
|
|
}
|
|
return v;
|
|
}
|
|
|
|
/* Input is 0.0,1.0, output is 0.0,255.0 */
|
|
static inline CC_ALWAYSINLINE float linear2srgb( float v )
|
|
{
|
|
float vpow;
|
|
union
|
|
{
|
|
int32_t i;
|
|
float f;
|
|
} u;
|
|
if( v <= 0.0031308f )
|
|
v = v * (12.92f*255.0f);
|
|
else
|
|
{
|
|
u.f = ( v * 6521909350804488192.0f );
|
|
u.i = (int32_t)ROUND_POSITIVE_FLOAT( (float)u.i * 0.666666666666f );
|
|
vpow = u.f;
|
|
v = ( v * vpow ) + ( ( v * v ) / sqrtf( vpow ) );
|
|
v = ( (1.055f*255.0f) * sqrtf( sqrtf( v * 0.5290553722f ) ) - (0.055f*255.0f) );
|
|
}
|
|
return v;
|
|
}
|
|
|
|
#else
|
|
|
|
/* Input is 0.0,255.0, output is 0.0,1.0 */
|
|
/* Only for reference, this is waayyy too slow and should never be used */
|
|
static inline CC_ALWAYSINLINE float srgb2linear( float v )
|
|
{
|
|
v *= (1.0f/255.0f);
|
|
if( v <= 0.04045f )
|
|
v = v * (1.0f/12.92);
|
|
else
|
|
v = powf( ( v + 0.055f ) * (1.0f/1.055f), 2.4f );
|
|
return v;
|
|
}
|
|
|
|
/* Input is 0.0,1.0, output is 0.0,255.0 */
|
|
/* Only for reference, this is waayyy too slow and should never be used */
|
|
static inline CC_ALWAYSINLINE float linear2srgb( float v )
|
|
{
|
|
if( v <= 0.0031308f )
|
|
v = v * 12.92f;
|
|
else
|
|
v = 1.055f * powf( v, 1.0f/2.4f ) - 0.055f;
|
|
return 255.0f * v;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static const float CPU_ALIGN16 srgbLinearConst00[4] = { 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f, 1024.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst01[4] = { (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), 1.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst02[4] = { 0.055f*255.0f, 0.055f*255.0f, 0.055f*255.0f, 0.055f*255.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst03[4] = { (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f), (1.0f/1.055f)*(1.0f/255.0f) };
|
|
static const float CPU_ALIGN16 srgbLinearConst04[4] = { 5417434112.0f, 5417434112.0f, 5417434112.0f, 5417434112.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst05[4] = { 0.8f, 0.8f, 0.8f, 0.8f };
|
|
static const float CPU_ALIGN16 srgbLinearConst06[4] = { 0.51011878327f, 0.51011878327f, 0.51011878327f, 0.51011878327f };
|
|
static const float CPU_ALIGN16 srgbLinearConst07[4] = { 0.0031308f, 0.0031308f, 0.0031308f, 1024.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst08[4] = { 12.92f*255.0f, 12.92f*255.0f, 12.92f*255.0f, 1.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst09[4] = { 6521909350804488192.0f, 6521909350804488192.0f, 6521909350804488192.0f, 6521909350804488192.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst10[4] = { 0.666666666666f, 0.666666666666f, 0.666666666666f, 0.666666666666f };
|
|
static const float CPU_ALIGN16 srgbLinearConst11[4] = { 0.5290553722f, 0.5290553722f, 0.5290553722f, 0.5290553722f };
|
|
static const float CPU_ALIGN16 srgbLinearConst12[4] = { 1.055f*255.0f, 1.055f*255.0f, 1.055f*255.0f, 1.055f*255.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst13[4] = { -0.055f*255.0f, -0.055f*255.0f, -0.055f*255.0f, -0.055f*255.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst14[4] = { 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f, 0.04045f*255.0f };
|
|
static const float CPU_ALIGN16 srgbLinearConst15[4] = { (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f), (1.0f/12.92f)*(1.0f/255.0f) };
|
|
|
|
/* Input is 0.0,255.0 ~ output is 0.0,1.0 ~ alpha channel is passed as-is */
|
|
static inline CC_ALWAYSINLINE __m128 srgb2linear3( __m128 vx )
|
|
{
|
|
__m128 vmask, vbase;
|
|
__m128 vpow, vpwsqrtinv, vpwsqrt, vx2;
|
|
vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst00 );
|
|
vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst01 );
|
|
vx = _mm_mul_ps( _mm_add_ps( vx, *(__m128*)srgbLinearConst02 ), *(__m128*)srgbLinearConst03 );
|
|
vx2 = _mm_mul_ps( vx, vx );
|
|
vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst04 ) ) ), *(__m128*)srgbLinearConst05 ) ) );
|
|
vpwsqrtinv = _mm_rsqrt_ps( vpow );
|
|
vpwsqrt = _mm_mul_ps( vpow, vpwsqrtinv );
|
|
vx = _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx2, vpwsqrt ), _mm_mul_ps( _mm_mul_ps( _mm_mul_ps( vx2, vx ), vpwsqrtinv ), _mm_rsqrt_ps( vpwsqrt ) ) ), *(__m128*)srgbLinearConst06 );
|
|
return CPU_BLENDV_PS( vx, vbase, vmask );
|
|
}
|
|
|
|
/* Input is 0.0,1.0 ~ output is 0.0,255.0 ~ alpha channel is passed as-is */
|
|
static inline CC_ALWAYSINLINE __m128 linear2srgb3( __m128 vx )
|
|
{
|
|
__m128 vmask, vbase, vpow;
|
|
vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst07 );
|
|
vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst08 );
|
|
vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst09 ) ) ), *(__m128*)srgbLinearConst10 ) ) );
|
|
vx = _mm_add_ps( _mm_mul_ps( _mm_sqrt_ps( _mm_sqrt_ps( _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx, vpow ), _mm_mul_ps( _mm_mul_ps( vx, vx ), _mm_rsqrt_ps( vpow ) ) ), *(__m128*)srgbLinearConst11 ) ) ), *(__m128*)srgbLinearConst12 ), *(__m128*)srgbLinearConst13 );
|
|
return CPU_BLENDV_PS( vx, vbase, vmask );
|
|
}
|
|
|
|
/* Input is 0.0,255.0 ~ output is 0.0,1.0 ~ alpha channel is passed as-is */
|
|
static inline CC_ALWAYSINLINE __m128 srgb2linear4( __m128 vx )
|
|
{
|
|
__m128 vmask, vbase;
|
|
__m128 vpow, vpwsqrtinv, vpwsqrt, vx2;
|
|
vmask = _mm_cmple_ps( vx, *(__m128*)srgbLinearConst14 );
|
|
vbase = _mm_mul_ps( vx, *(__m128*)srgbLinearConst15 );
|
|
vx = _mm_mul_ps( _mm_add_ps( vx, *(__m128*)srgbLinearConst02 ), *(__m128*)srgbLinearConst03 );
|
|
vx2 = _mm_mul_ps( vx, vx );
|
|
vpow = _mm_castsi128_ps( _mm_cvtps_epi32( _mm_mul_ps( _mm_cvtepi32_ps( _mm_castps_si128( _mm_mul_ps( vx, *(__m128*)srgbLinearConst04 ) ) ), *(__m128*)srgbLinearConst05 ) ) );
|
|
vpwsqrtinv = _mm_rsqrt_ps( vpow );
|
|
vpwsqrt = _mm_mul_ps( vpow, vpwsqrtinv );
|
|
vx = _mm_mul_ps( _mm_add_ps( _mm_mul_ps( vx2, vpwsqrt ), _mm_mul_ps( _mm_mul_ps( _mm_mul_ps( vx2, vx ), vpwsqrtinv ), _mm_rsqrt_ps( vpwsqrt ) ) ), *(__m128*)srgbLinearConst06 );
|
|
return CPU_BLENDV_PS( vx, vbase, vmask );
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
static inline CC_ALWAYSINLINE double bessel( double x )
|
|
{
|
|
double sum, t, y;
|
|
|
|
/* Zeroth order Bessel function of the first kind. */
|
|
sum = 1.0;
|
|
y = x * x * 0.25;
|
|
t = y;
|
|
sum += t;
|
|
t *= y * (1.0/(2.0*2.0));
|
|
sum += t;
|
|
t *= y * (1.0/(3.0*3.0));
|
|
sum += t;
|
|
t *= y * (1.0/(4.0*4.0));
|
|
sum += t;
|
|
t *= y * (1.0/(5.0*5.0));
|
|
sum += t;
|
|
t *= y * (1.0/(6.0*6.0));
|
|
sum += t;
|
|
t *= y * (1.0/(7.0*7.0));
|
|
sum += t;
|
|
t *= y * (1.0/(8.0*8.0));
|
|
sum += t;
|
|
t *= y * (1.0/(9.0*9.0));
|
|
sum += t;
|
|
t *= y * (1.0/(10.0*10.0));
|
|
sum += t;
|
|
t *= y * (1.0/(11.0*11.0));
|
|
sum += t;
|
|
t *= y * (1.0/(12.0*12.0));
|
|
sum += t;
|
|
t *= y * (1.0/(13.0*13.0));
|
|
sum += t;
|
|
t *= y * (1.0/(14.0*14.0));
|
|
sum += t;
|
|
|
|
return sum;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE double kaiser( double x, double beta )
|
|
{
|
|
return bessel( beta * sqrt( fmax( 0.0, 1.0 - ( x * x ) ) ) );
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE double sinc( double x )
|
|
{
|
|
if( x == 0.0 )
|
|
return 1.0;
|
|
x = sin( x * M_PI ) / ( x * M_PI );
|
|
return x;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static inline CC_ALWAYSINLINE __m128 simd4f_bessel( __m128 x )
|
|
{
|
|
__m128 sum, t, y;
|
|
|
|
sum = *(__m128 *)simd4fOne;
|
|
y = _mm_mul_ps( *(__m128 *)simd4fQuarter, _mm_mul_ps( x, x ) );
|
|
t = y;
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(2.0f*2.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(3.0f*3.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(4.0f*4.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(5.0f*5.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(6.0f*6.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(7.0f*7.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(8.0f*8.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(9.0f*9.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(10.0f*10.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(11.0f*11.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(12.0f*12.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(13.0f*13.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
t = _mm_mul_ps( t, _mm_mul_ps( y, _mm_set1_ps( 1.0f/(14.0f*14.0f) ) ) );
|
|
sum = _mm_add_ps( sum, t );
|
|
|
|
return sum;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE __m128 simd4f_kaiser( __m128 x, __m128 beta )
|
|
{
|
|
return simd4f_bessel( _mm_mul_ps( beta, _mm_sqrt_ps( _mm_max_ps( _mm_setzero_ps(), _mm_sub_ps( *(__m128 *)simd4fOne, _mm_mul_ps( x, x ) ) ) ) ) );
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE __m128 simd4f_sinc( __m128 x )
|
|
{
|
|
__m128 zeromask;
|
|
zeromask = _mm_cmpeq_ps( x, _mm_setzero_ps() );
|
|
x = _mm_mul_ps( x, _mm_load_ps( simd4fPi ) );
|
|
x = _mm_div_ps( simd4f_sin_ps( x ), x );
|
|
x = CPU_BLENDV_PS( x, *(__m128 *)simd4fOne, zeromask );
|
|
return x;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
typedef struct
|
|
{
|
|
int matrixsize;
|
|
int matrixoffset;
|
|
int matrixrowwidth;
|
|
int matrixrowsize;
|
|
int rowreturn;
|
|
float *matrix;
|
|
|
|
int minimumalpha;
|
|
float dithersum;
|
|
float minimumalphaf;
|
|
float amplifynormal;
|
|
float normalsustainfactor;
|
|
void *alloc;
|
|
|
|
unsigned char *srcdata;
|
|
int width1;
|
|
int width2;
|
|
int width3;
|
|
int width4;
|
|
int height;
|
|
int bytesperline;
|
|
} imStaticMatrixState;
|
|
|
|
|
|
static int imBuildStaticMatrix( imStaticMatrixState * CC_RESTRICT state, int sizedivisor, float hopcount, float alpha )
|
|
{
|
|
int i, j, minx, maxx;
|
|
double x, xshift, hopsize, offset, scalefactor, hopcountinv, beta, linsq, sum;
|
|
double *linear;
|
|
float suminv;
|
|
float *matrix;
|
|
|
|
if( alpha > 16.0f )
|
|
alpha = 16.0f;
|
|
beta = (double)alpha * (double)M_PI;
|
|
hopcountinv = 1.0 / (double)hopcount;
|
|
|
|
scalefactor = 1.0 / (double)sizedivisor;
|
|
hopsize = 0.5 * (double)sizedivisor;
|
|
offset = hopsize - 0.5;
|
|
minx = (int)ceil( ( (double)-hopcount * hopsize ) + offset );
|
|
maxx = (int)floor( ( (double)hopcount * hopsize ) + offset );
|
|
state->matrixoffset = minx;
|
|
state->matrixsize = ( maxx - minx ) + 1;
|
|
state->matrixrowwidth = ( state->matrixsize + 3 ) & ~3;
|
|
state->rowreturn = state->matrixrowwidth - state->matrixsize;
|
|
state->matrixrowsize = state->matrixrowwidth * sizeof(float);
|
|
|
|
#if IM_RESIZE_DEBUG
|
|
printf( "ResizeMatrix ; scalefactor %.3f, offset %.3f, hopsize %.3f\n", scalefactor, offset, hopsize );
|
|
#endif
|
|
|
|
linear = malloc( state->matrixrowwidth * sizeof(double) );
|
|
for( i = 0 ; i < state->matrixsize ; i++ )
|
|
{
|
|
x = (double)( i + state->matrixoffset );
|
|
xshift = 2.0 * scalefactor * ( x - offset );
|
|
linear[i] = sinc( xshift ) * kaiser( hopcountinv * xshift, beta );
|
|
#if IM_RESIZE_DEBUG
|
|
printf( " x[%+.3f] = %+.3f ( %+.3f * %+.3f )\n", x, linear[i], sinc( xshift ), kaiser( hopcountinv * xshift, beta ) );
|
|
#endif
|
|
}
|
|
for( ; i < state->matrixrowwidth ; i++ )
|
|
linear[i] = 0.0;
|
|
|
|
/* Build normalized state */
|
|
state->alloc = malloc( ( state->matrixsize * state->matrixrowsize ) + 16 );
|
|
state->matrix = (void *)( ( (uintptr_t)state->alloc + 0xf ) & ~0xf );
|
|
matrix = state->matrix;
|
|
sum = 0.0;
|
|
for( i = 0 ; i < state->matrixsize ; i++ )
|
|
{
|
|
for( j = 0 ; j < state->matrixsize ; j++ )
|
|
{
|
|
linsq = linear[i] * linear[j];
|
|
matrix[j] = (float)linsq;
|
|
sum += linsq;
|
|
}
|
|
for( ; j < state->matrixrowwidth ; j++ )
|
|
matrix[j] = 0.0f;
|
|
matrix += state->matrixrowwidth;
|
|
}
|
|
free( linear );
|
|
|
|
#if IM_RESIZE_DEBUG
|
|
printf( "Matrix sum : %f\n", sum );
|
|
#endif
|
|
|
|
suminv = (float)( 1.0 / sum );
|
|
j = state->matrixsize * state->matrixrowwidth;
|
|
for( i = 0 ; i < j ; i++ )
|
|
state->matrix[i] *= suminv;
|
|
|
|
#if IM_RESIZE_DEBUG
|
|
printf( "Matrix %dx%d :\n", state->matrixsize, state->matrixsize );
|
|
for( i = 0 ; i < state->matrixsize ; i++ )
|
|
{
|
|
for( j = 0 ; j < state->matrixsize ; j++ )
|
|
printf( " %+.6f", state->matrix[ ( i * state->matrixrowwidth ) + j ] );
|
|
printf( "\n" );
|
|
}
|
|
printf( "Matrix Offset : %d\n", state->matrixoffset );
|
|
printf( "Matrix Size : %d\n", state->matrixsize );
|
|
printf( "Matrix Rowwidth : %d\n", state->matrixrowwidth );
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
static void imFreeStaticState( imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
free( state->alloc );
|
|
state->alloc = 0;
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
typedef struct
|
|
{
|
|
int matrixsizex, matrixsizey;
|
|
int matrixoffsetx, matrixoffsety;
|
|
float *linearx;
|
|
float *lineary;
|
|
float beta;
|
|
float hopcountinv;
|
|
|
|
float dithersum;
|
|
int minimumalpha;
|
|
float minimumalphaf;
|
|
float amplifynormal;
|
|
float normalsustainfactor;
|
|
void *alloc;
|
|
|
|
unsigned char *srcdata;
|
|
int width1;
|
|
int width2;
|
|
int width3;
|
|
int width4;
|
|
int height;
|
|
int bytesperline;
|
|
} imGenericMatrixState;
|
|
|
|
|
|
static inline int imAllocGenericState( imGenericMatrixState *state, float scalex, float scaley, float hopcount, float alpha )
|
|
{
|
|
int allocx, allocy, size;
|
|
void *align;
|
|
if( alpha > 16.0f )
|
|
alpha = 16.0f;
|
|
allocx = ( (int)ceilf( hopcount / scalex ) + 2 + 3 ) & ~0x3;
|
|
allocy = ( (int)ceilf( hopcount / scaley ) + 2 + 3 ) & ~0x3;
|
|
size = ( ( allocx + allocy ) * sizeof(float) ) + 16;
|
|
state->alloc = malloc( size );
|
|
memset( state->alloc, 0, size );
|
|
align = (void *)( ( (uintptr_t)state->alloc + 0xf ) & ~0xf );
|
|
state->linearx = align;
|
|
state->lineary = ADDRESS( align, allocx * sizeof(float) );
|
|
state->beta = alpha * (float)M_PI;
|
|
state->hopcountinv = 1.0f / (float)hopcount;
|
|
return 1;
|
|
}
|
|
|
|
static inline void imBuildGenericLinearX( imGenericMatrixState *state, float scalex, float scaleinvx, float sourcex, float hopcount, float alpha, int width )
|
|
{
|
|
int i, minx, maxx;
|
|
float hopsizex, offsetx;
|
|
float *linearx;
|
|
|
|
hopsizex = 0.5f * scaleinvx;
|
|
offsetx = (float)sourcex;
|
|
minx = (int)ceil( ( -hopcount * hopsizex ) + offsetx );
|
|
maxx = (int)floor( ( hopcount * hopsizex ) + offsetx );
|
|
state->matrixsizex = ( maxx - minx ) + 1;
|
|
state->matrixoffsetx = ( minx + ( width << 8 ) ) % width;
|
|
|
|
linearx = state->linearx;
|
|
scalex *= 2.0f;
|
|
#if CPU_SSE2_SUPPORT
|
|
for( i = 0 ; i < state->matrixsizex ; i += 4 )
|
|
{
|
|
__m128 vx, vxshift;
|
|
vx = _mm_add_ps( _mm_set1_ps( (float)( i + minx ) ), _mm_load_ps( simd4fZeroOneTwoThree ) );
|
|
vxshift = _mm_mul_ps( _mm_set1_ps( scalex ), _mm_sub_ps( vx, _mm_set1_ps( offsetx ) ) );
|
|
_mm_store_ps( &linearx[i], _mm_mul_ps( simd4f_sinc( vxshift ), simd4f_kaiser( _mm_mul_ps( _mm_set1_ps( state->hopcountinv ), vxshift ), _mm_set1_ps( state->beta ) ) ) );
|
|
#if IM_RESIZE_DEBUG
|
|
printf( " linearx[%d] = %.3f\n", i+minx+0, linearx[i+0] );
|
|
printf( " linearx[%d] = %.3f\n", i+minx+1, linearx[i+1] );
|
|
printf( " linearx[%d] = %.3f\n", i+minx+2, linearx[i+2] );
|
|
printf( " linearx[%d] = %.3f\n", i+minx+3, linearx[i+3] );
|
|
#endif
|
|
}
|
|
#else
|
|
for( i = 0 ; i < state->matrixsizex ; i++ )
|
|
{
|
|
float x, xshift;
|
|
x = (float)( i + minx );
|
|
xshift = scalex * ( x - offsetx );
|
|
linearx[i] = (float)( sinc( xshift ) * kaiser( state->hopcountinv * xshift, state->beta ) );
|
|
#if IM_RESIZE_DEBUG
|
|
printf( " linearx[%+.3f] = %.3f ( %+.3f * %+.3f )\n", x, linearx[i], sinc( xshift ), kaiser( state->hopcountinv * xshift, state->beta ) );
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
static inline void imBuildGenericLinearY( imGenericMatrixState *state, float scaley, float scaleinvy, float sourcey, float hopcount, float alpha, int height )
|
|
{
|
|
int i, miny, maxy;
|
|
float hopsizey, offsety;
|
|
float *lineary;
|
|
|
|
hopsizey = 0.5f * scaleinvy;
|
|
offsety = (float)sourcey;
|
|
miny = (int)ceil( ( -hopcount * hopsizey ) + offsety );
|
|
maxy = (int)floor( ( hopcount * hopsizey ) + offsety );
|
|
state->matrixsizey = ( maxy - miny ) + 1;
|
|
state->matrixoffsety = ( miny + ( height << 8 ) ) % height;
|
|
|
|
lineary = state->lineary;
|
|
scaley *= 2.0f;
|
|
#if CPU_SSE2_SUPPORT
|
|
for( i = 0 ; i < state->matrixsizey ; i += 4 )
|
|
{
|
|
__m128 vy, vyshift;
|
|
vy = _mm_add_ps( _mm_set1_ps( (float)( i + miny ) ), _mm_load_ps( simd4fZeroOneTwoThree ) );
|
|
vyshift = _mm_mul_ps( _mm_set1_ps( scaley ), _mm_sub_ps( vy, _mm_set1_ps( offsety ) ) );
|
|
_mm_store_ps( &lineary[i], _mm_mul_ps( simd4f_sinc( vyshift ), simd4f_kaiser( _mm_mul_ps( _mm_set1_ps( state->hopcountinv ), vyshift ), _mm_set1_ps( state->beta ) ) ) );
|
|
#if IM_RESIZE_DEBUG
|
|
printf( " lineary[%d] = %.3f\n", i+miny+0, lineary[i+0] );
|
|
printf( " lineary[%d] = %.3f\n", i+miny+1, lineary[i+1] );
|
|
printf( " lineary[%d] = %.3f\n", i+miny+2, lineary[i+2] );
|
|
printf( " lineary[%d] = %.3f\n", i+miny+3, lineary[i+3] );
|
|
#endif
|
|
}
|
|
#else
|
|
for( i = 0 ; i < state->matrixsizey ; i++ )
|
|
{
|
|
float y, yshift;
|
|
y = (float)( i + miny );
|
|
yshift = scaley * ( y - offsety );
|
|
lineary[i] = (float)( sinc( yshift ) * kaiser( state->hopcountinv * yshift, state->beta ) );
|
|
#if IM_RESIZE_DEBUG
|
|
printf( " lineary[%+.3f] = %.3f ( %+.3f * %+.3f )\n", y, lineary[i], sinc( yshift ), kaiser( state->hopcountinv * yshift, state->beta ) );
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static inline void imFreeGenericState( imGenericMatrixState *state )
|
|
{
|
|
free( state->alloc );
|
|
state->alloc = 0;
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
static void imStaticKernel1Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
mapx++;
|
|
if( mapx >= state->width1 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel2Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 1;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
mapx += 2;
|
|
if( mapx >= state->width2 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel3Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx + ( pointx << 1 );
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel4Linear( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f, sum0, sum1, sum2, sum3;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
#endif
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
#else
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static void imStaticKernel4Linear_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
__m128 vsum, vf, v0, v1, v2, v3;
|
|
__m128i vzero;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x += 4 )
|
|
{
|
|
vf = _mm_load_ps( &matrix[x] );
|
|
v0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 0 ] ) ), vzero ) );
|
|
v1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 4 ] ) ), vzero ) );
|
|
v2 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 8 ] ) ), vzero ) );
|
|
v3 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx + 12 ] ) ), vzero ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x00 ), v0 ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x55 ), v1 ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xaa ), v2 ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xff ), v3 ) );
|
|
mapx += 16;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel4LinearAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2, sum3;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x] * (float)src[ mapx + 3 ];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
sum3 += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum0 * f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum1 * f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum2 * f ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static void imStaticKernel4LinearAlphaNorm_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
uint32_t pixel;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
__m128 vsum0, vsum1, vsum2, vsum3;
|
|
__m128 vf, valpha, vr, vg, vb, va, vsrcf;
|
|
__m128i vsrc, vshufmask;
|
|
__m128i vzero;
|
|
|
|
#if CPU_SSSE3_SUPPORT
|
|
vshufmask = _mm_setr_epi8( 0x00,0x04,0x08,0x0c, 0x01,0x05,0x09,0x0d, 0x02,0x06,0x0a,0x0e, 0x03,0x07,0x0b,0x0f );
|
|
#endif
|
|
vsum0 = _mm_setzero_ps();
|
|
vsum1 = _mm_setzero_ps();
|
|
vsum2 = _mm_setzero_ps();
|
|
vsum3 = _mm_setzero_ps();
|
|
vzero = _mm_castps_si128( _mm_setzero_ps() );
|
|
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x += 4 )
|
|
{
|
|
vf = _mm_load_ps( &matrix[x] );
|
|
/* Load 16 bytes and unpack as RRRR,GGGG,BBBB,AAAA in one SSE register */
|
|
vsrc = _mm_loadu_si128( (void *)&src[ mapx ] );
|
|
#if CPU_SSSE3_SUPPORT
|
|
vsrc = _mm_shuffle_epi8( vsrc, vshufmask );
|
|
#else
|
|
vshufmask = _mm_shuffle_epi32( vsrc, 0x39 );
|
|
vsrc = _mm_unpacklo_epi16( _mm_unpacklo_epi8( vsrc, vshufmask ), _mm_unpackhi_epi8( vsrc, vshufmask ) );
|
|
#endif
|
|
/* Break that into 4 SSE registers as floats: vR,vG,vB,vA */
|
|
vsrcf = _mm_castsi128_ps( vsrc );
|
|
vr = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( vsrcf ), vzero ) );
|
|
#if CPU_SSE3_SUPPORT
|
|
vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehdup_ps( vsrcf ) ), vzero ) );
|
|
#else
|
|
vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0x55 ) ), vzero ) );
|
|
#endif
|
|
vb = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehl_ps( vsrcf, vsrcf ) ), vzero ) );
|
|
va = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0xff ) ), vzero ) );
|
|
valpha = _mm_mul_ps( va, vf );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( vr, valpha ) );
|
|
vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( vg, valpha ) );
|
|
vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( vb, valpha ) );
|
|
vsum3 = _mm_add_ps( vsum3, valpha );
|
|
mapx += 16;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE3_SUPPORT
|
|
vsum0 = _mm_hadd_ps( vsum0, vsum1 );
|
|
vsum2 = _mm_hadd_ps( vsum2, vsum3 );
|
|
vsum0 = _mm_hadd_ps( vsum0, vsum2 );
|
|
#else
|
|
vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum2 ), _mm_unpackhi_ps( vsum0, vsum2 ) );
|
|
vsum1 = _mm_add_ps( _mm_unpacklo_ps( vsum1, vsum3 ), _mm_unpackhi_ps( vsum1, vsum3 ) );
|
|
vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum1 ), _mm_unpackhi_ps( vsum0, vsum1 ) );
|
|
#endif
|
|
|
|
valpha = _mm_shuffle_ps( vsum0, vsum0, 0xff );
|
|
pixel = 0;
|
|
if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
|
|
{
|
|
__m128i vpixel;
|
|
vsum0 = _mm_mul_ps( vsum0, _mm_rcp_ps( valpha ) );
|
|
vsum0 = CPU_BLENDV_PS( vsum0, valpha, *(__m128 *)simd4fAlphaMask );
|
|
vpixel = _mm_cvtps_epi32( vsum0 );
|
|
vpixel = _mm_packs_epi32( vpixel, vpixel );
|
|
vpixel = _mm_packus_epi16( vpixel, vpixel );
|
|
pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
|
|
}
|
|
*(uint32_t *)dst = pixel;
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel1sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f, sum0;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_set_ss( (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set_ss( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
#endif
|
|
mapx++;
|
|
if( mapx >= state->width1 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
dst[0] = _mm_cvtsi128_si32( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( linear2srgb3( vsum ) ), vzero ), vzero ) );
|
|
#else
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel2sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f, sum0, sum1;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 1;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_set_ps( 0.0f, 0.0f, (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
#endif
|
|
mapx += 2;
|
|
if( mapx >= state->width2 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
union
|
|
{
|
|
char c[4];
|
|
uint32_t i;
|
|
} u;
|
|
vsum = linear2srgb3( vsum );
|
|
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
dst[0] = u.c[0];
|
|
dst[1] = u.c[1];
|
|
#else
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel3sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f, sum0, sum1, sum2;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx + ( pointx << 1 );
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_set_ps( 0.0f, (float)src[ mapx + 2 ], (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
|
|
#endif
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
union
|
|
{
|
|
char c[4];
|
|
uint32_t i;
|
|
} u;
|
|
vsum = linear2srgb3( vsum );
|
|
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
dst[0] = u.c[0];
|
|
dst[1] = u.c[1];
|
|
dst[2] = u.c[2];
|
|
#else
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel4sRGB( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f, sum0, sum1, sum2, sum3;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
#endif
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = linear2srgb3( vsum );
|
|
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
#else
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static void imStaticKernel3sRGB_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
__m128 vsum0, vsum1, vsum2, vsrc0, vsrc1, vsrc2, vf;
|
|
__m128i vzero;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
union
|
|
{
|
|
char c[4];
|
|
uint32_t i;
|
|
} u;
|
|
|
|
vsum0 = _mm_setzero_ps();
|
|
vsum1 = _mm_setzero_ps();
|
|
vsum2 = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx + ( pointx << 1 );
|
|
for( x = 0 ; x < state->matrixsize ; x += 4 )
|
|
{
|
|
vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
|
|
vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
|
|
vsrc2 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+8 ] ) ), vzero ) );
|
|
vsrc0 = srgb2linear4( vsrc0 );
|
|
vsrc1 = srgb2linear4( vsrc1 );
|
|
vsrc2 = srgb2linear4( vsrc2 );
|
|
vf = _mm_load_ps( &matrix[x] );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0x40 ), vsrc0 ) );
|
|
vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xA5 ), vsrc1 ) );
|
|
vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( _mm_shuffle_ps( vf, vf, 0xFE ), vsrc2 ) );
|
|
mapx += 12;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSSE3_SUPPORT
|
|
vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum1 ), _mm_castps_si128( vsum0 ), 12 ) ) );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum2 ), _mm_castps_si128( vsum1 ), 8 ) ) );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_castsi128_ps( _mm_alignr_epi8( _mm_castps_si128( vsum2 ), _mm_castps_si128( vsum2 ), 4 ) ) );
|
|
#else
|
|
vf = _mm_shuffle_ps( vsum0, vsum1, 0x4f );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vf, vf, 0x38 ) );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vsum1, vsum2, 0x0E ) );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_shuffle_ps( vsum2, vsum2, 0x39 ) );
|
|
#endif
|
|
|
|
vsum0 = linear2srgb3( vsum0 );
|
|
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum0 ), vzero ), vzero ) ) );
|
|
dst[0] = u.c[0];
|
|
dst[1] = u.c[1];
|
|
dst[2] = u.c[2];
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel4sRGB_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
__m128 vsum, vsrc0, vsrc1;
|
|
__m128i vzero;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x += 2 )
|
|
{
|
|
vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
|
|
vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
|
|
vsrc0 = srgb2linear3( vsrc0 );
|
|
vsrc1 = srgb2linear3( vsrc1 );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x+0] ), vsrc0 ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x+1] ), vsrc1 ) );
|
|
mapx += 8;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
vsum = linear2srgb3( vsum );
|
|
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel4sRGBAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc, valpha;
|
|
__m128i vzero;
|
|
uint32_t pixel;
|
|
#else
|
|
float f, sum0, sum1, sum2, sum3;
|
|
#endif
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
valpha = _mm_shuffle_ps( vsrc, _mm_set_ss( 1.0f ), 0x0f );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha, valpha, 0xC0 ), _mm_set1_ps( matrix[x] ) ), vsrc ) );
|
|
#else
|
|
f = matrix[x] * (float)src[ mapx + 3 ];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
|
|
sum3 += f;
|
|
#endif
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
valpha = _mm_shuffle_ps( vsum, vsum, 0xff );
|
|
pixel = 0;
|
|
if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
|
|
{
|
|
__m128i vpixel;
|
|
vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha ) );
|
|
vsum = CPU_BLENDV_PS( vsum, valpha, *(__m128 *)simd4fAlphaMask );
|
|
vsum = linear2srgb3( vsum );
|
|
vpixel = _mm_cvtps_epi32( vsum );
|
|
vpixel = _mm_packs_epi32( vpixel, vpixel );
|
|
vpixel = _mm_packus_epi16( vpixel, vpixel );
|
|
pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
|
|
}
|
|
*(uint32_t *)dst = pixel;
|
|
#else
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * f ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static void imStaticKernel4sRGBAlphaNorm_Core( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
__m128 vsum, vsrc0, vsrc1, valpha0, valpha1;
|
|
__m128i vzero;
|
|
uint32_t pixel;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x += 2 )
|
|
{
|
|
vsrc0 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+0 ] ) ), vzero ) );
|
|
vsrc1 = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx+4 ] ) ), vzero ) );
|
|
valpha0 = _mm_shuffle_ps( vsrc0, _mm_set_ss( 1.0f ), 0x0f );
|
|
valpha1 = _mm_shuffle_ps( vsrc1, _mm_set_ss( 1.0f ), 0x0f );
|
|
vsrc0 = srgb2linear3( vsrc0 );
|
|
vsrc1 = srgb2linear3( vsrc1 );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha0, valpha0, 0xC0 ), _mm_set1_ps( matrix[x+0] ) ), vsrc0 ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha1, valpha1, 0xC0 ), _mm_set1_ps( matrix[x+1] ) ), vsrc1 ) );
|
|
mapx += 8;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
valpha0 = _mm_shuffle_ps( vsum, vsum, 0xff );
|
|
pixel = 0;
|
|
if( _mm_comige_ss( valpha0, _mm_load_ss( &state->minimumalphaf ) ) )
|
|
{
|
|
__m128i vpixel;
|
|
vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha0 ) );
|
|
vsum = CPU_BLENDV_PS( vsum, valpha0, *(__m128 *)simd4fAlphaMask );
|
|
vsum = linear2srgb3( vsum );
|
|
vpixel = _mm_cvtps_epi32( vsum );
|
|
vpixel = _mm_packs_epi32( vpixel, vpixel );
|
|
vpixel = _mm_packus_epi16( vpixel, vpixel );
|
|
pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
|
|
}
|
|
*(uint32_t *)dst = pixel;
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel3Normal( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx + ( pointx << 1 );
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
sum0 -= 0.5f*255.0f;
|
|
sum1 -= 0.5f*255.0f;
|
|
sum2 -= 0.5f*255.0f;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
|
|
sum0 = (0.5f*255.0f) + ( sum0 * suminv );
|
|
sum1 = (0.5f*255.0f) + ( sum1 * suminv );
|
|
sum2 = (0.5f*255.0f) + ( sum2 * suminv );
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel4Normal( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float f;
|
|
#endif
|
|
float sum0, sum1, sum2, sum3, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( matrix[x] ), vsrc ) );
|
|
#else
|
|
f = matrix[x];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
#endif
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_sub_ps( vsum, _mm_set_ps( 0.0f, 0.5f*255.0f, 0.5f*255.0f, 0.5f*255.0f ) );
|
|
sum0 = _mm_cvtss_f32( vsum );
|
|
#if CPU_SSE3_SUPPORT
|
|
sum1 = _mm_cvtss_f32( _mm_movehdup_ps( vsum ) );
|
|
#else
|
|
sum1 = _mm_cvtss_f32( _mm_shuffle_ps( vsum, vsum, 0x55 ) );
|
|
#endif
|
|
sum2 = _mm_cvtss_f32( _mm_movehl_ps( vsum, vsum ) );
|
|
sum3 = _mm_cvtss_f32( _mm_shuffle_ps( vsum, vsum, 0xff ) );
|
|
#else
|
|
sum0 -= 0.5f*255.0f;
|
|
sum1 -= 0.5f*255.0f;
|
|
sum2 -= 0.5f*255.0f;
|
|
#endif
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
|
|
sum0 = (0.5f*255.0f) + ( sum0 * suminv );
|
|
sum1 = (0.5f*255.0f) + ( sum1 * suminv );
|
|
sum2 = (0.5f*255.0f) + ( sum2 * suminv );
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel3NormalSustain( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, v0, v1, v2, energy, sum0, sum1, sum2, sumenergy, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx + ( pointx << 1 );
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernel4NormalSustain( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x];
|
|
v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
v3 = f * (float)src[ mapx + 3 ];
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
sum3 += v3;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernel4NormalSustainAlphaNorm( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrix = state->matrix;
|
|
mapy = pointy;
|
|
for( y = 0 ; y < state->matrixsize ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = pointx << 2;
|
|
for( x = 0 ; x < state->matrixsize ; x++ )
|
|
{
|
|
f = matrix[x] * (float)src[ mapx + 3 ];
|
|
v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
v3 = f;
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
sum3 += v3;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
matrix = ADDRESS( matrix, state->matrixrowsize );
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
sum2 *= f;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernelPoT3Water( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy, heightmask, widthmask;
|
|
int minx, maxx, miny, maxy;
|
|
float f, sum0, sum1, sum2, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
minx = pointx;
|
|
maxx = minx + state->matrixsize;
|
|
miny = pointy;
|
|
maxy = miny + state->matrixsize;
|
|
heightmask = state->height - 1;
|
|
widthmask = state->width1 - 1;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
matrix = state->matrix;
|
|
for( y = miny ; y < maxy ; y++ )
|
|
{
|
|
mapy = y & heightmask;
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
for( x = minx ; x < maxx ; x++, matrix++ )
|
|
{
|
|
mapx = x & widthmask;
|
|
mapx += mapx << 1;
|
|
f = *matrix;
|
|
sum0 += (float)src[ mapx + 0 ] * f;
|
|
sum1 += (float)src[ mapx + 1 ] * f;
|
|
sum2 += (float)src[ mapx + 2 ] * f;
|
|
}
|
|
matrix += state->rowreturn;
|
|
}
|
|
|
|
sum0 *= 1.0f/255.0f;
|
|
sum1 *= 1.0f/255.0f;
|
|
sum2 *= 1.0f/255.0f;
|
|
sum0 = 2.0f * ( sum0 - 0.5f );
|
|
sum1 = 2.0f * ( sum1 - 0.5f );
|
|
suminv = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
if( suminv < 0.75f )
|
|
{
|
|
suminv = 0.5f / suminv;
|
|
sum0 = 0.5f + ( sum0 * suminv );
|
|
sum1 = 0.5f + ( sum1 * suminv );
|
|
}
|
|
if( sum2 > 0.1f )
|
|
{
|
|
state->dithersum += sum2;
|
|
if( sum2 > 0.45f )
|
|
sum2 = 1.0f;
|
|
else if( ( sum2 < 0.3f ) && ( state->dithersum < 1.0f ) )
|
|
sum2 = 0.0f;
|
|
else
|
|
sum2 = ( ( sum2 + state->dithersum ) < 0.45f ? 0.0f : 1.0f );
|
|
state->dithersum -= sum2;
|
|
}
|
|
sum0 *= 255.0f;
|
|
sum1 *= 255.0f;
|
|
sum2 *= 255.0f;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imStaticKernelPoT4Water( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy, heightmask, widthmask;
|
|
int minx, maxx, miny, maxy;
|
|
float f, sum0, sum1, sum2, sum3, suminv;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
minx = pointx;
|
|
maxx = minx + state->matrixsize;
|
|
miny = pointy;
|
|
maxy = miny + state->matrixsize;
|
|
heightmask = state->height - 1;
|
|
widthmask = state->width1 - 1;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrix = state->matrix;
|
|
for( y = miny ; y < maxy ; y++ )
|
|
{
|
|
mapy = y & heightmask;
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
for( x = minx ; x < maxx ; x++, matrix++ )
|
|
{
|
|
mapx = x & widthmask;
|
|
mapx <<= 2;
|
|
f = *matrix;
|
|
sum0 += (float)src[ mapx + 0 ] * f;
|
|
sum1 += (float)src[ mapx + 1 ] * f;
|
|
sum2 += (float)src[ mapx + 2 ] * f;
|
|
sum3 += (float)src[ mapx + 3 ] * f;
|
|
}
|
|
matrix += state->rowreturn;
|
|
}
|
|
|
|
sum0 *= 1.0f/255.0f;
|
|
sum1 *= 1.0f/255.0f;
|
|
sum2 *= 1.0f/255.0f;
|
|
sum0 = 2.0f * ( sum0 - 0.5f );
|
|
sum1 = 2.0f * ( sum1 - 0.5f );
|
|
suminv = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
if( suminv < 0.75f )
|
|
{
|
|
suminv = 0.5f / suminv;
|
|
sum0 = 0.5f + ( sum0 * suminv );
|
|
sum1 = 0.5f + ( sum1 * suminv );
|
|
}
|
|
if( sum2 > 0.1f )
|
|
{
|
|
state->dithersum += sum2;
|
|
if( sum2 > 0.45f )
|
|
sum2 = 1.0f;
|
|
else if( ( sum2 < 0.3f ) && ( state->dithersum < 1.0f ) )
|
|
sum2 = 0.0f;
|
|
else
|
|
sum2 = ( ( sum2 + state->dithersum ) < 0.45f ? 0.0f : 1.0f );
|
|
state->dithersum -= sum2;
|
|
}
|
|
sum0 *= 255.0f;
|
|
sum1 *= 255.0f;
|
|
sum2 *= 255.0f;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imStaticKernelPoT4Plant( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state )
|
|
{
|
|
int x, y, mapx, mapy, heightmask, widthmask;
|
|
int minx, maxx, miny, maxy;
|
|
float f, sum0, sum1, sum2, sum3;
|
|
float *matrix;
|
|
unsigned char *src;
|
|
|
|
minx = pointx;
|
|
maxx = minx + state->matrixsize;
|
|
miny = pointy;
|
|
maxy = miny + state->matrixsize;
|
|
heightmask = state->height - 1;
|
|
widthmask = state->width1 - 1;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrix = state->matrix;
|
|
for( y = miny ; y < maxy ; y++ )
|
|
{
|
|
mapy = y & heightmask;
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
for( x = minx ; x < maxx ; x++, matrix++ )
|
|
{
|
|
mapx = x & widthmask;
|
|
mapx <<= 2;
|
|
f = *matrix;
|
|
sum0 += (float)src[ mapx + 0 ] * f;
|
|
sum1 += (float)src[ mapx + 1 ] * f;
|
|
sum2 += (float)src[ mapx + 2 ] * f;
|
|
sum3 += (float)src[ mapx + 3 ] * f;
|
|
}
|
|
matrix += state->rowreturn;
|
|
}
|
|
|
|
sum3 *= 1.25f;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
int imReduceImageKaiserDataDivisor( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int sizedivisor, imReduceOptions *options )
|
|
{
|
|
int filter, x, y, pointx, pointy, basex, basey, pow2flag;
|
|
int newwidth, newheight;
|
|
unsigned char *dst;
|
|
imStaticMatrixState state;
|
|
void (*applykernel)( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state );
|
|
#if CPU_SSE2_SUPPORT
|
|
int corebase, corerange;
|
|
void (*applykernelcore)( unsigned char *dst, int pointx, int pointy, imStaticMatrixState * CC_RESTRICT state );
|
|
#endif
|
|
|
|
filter = options->filter;
|
|
imBuildStaticMatrix( &state, sizedivisor, options->hopcount, options->alpha );
|
|
|
|
newwidth = ( width < sizedivisor ) ? 1 : ( ( width + sizedivisor - 1 ) / sizedivisor );
|
|
newheight = ( height < sizedivisor ) ? 1 : ( ( height + sizedivisor - 1 ) / sizedivisor );
|
|
|
|
pow2flag = ccIsPow2Int32( width ) && ccIsPow2Int32( height );
|
|
applykernel = 0;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = 0;
|
|
#endif
|
|
|
|
if( filter == IM_REDUCE_FILTER_LINEAR )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
{
|
|
applykernel = imStaticKernel4Linear;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imStaticKernel4Linear_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3Linear;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imStaticKernel2Linear;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imStaticKernel1Linear;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
{
|
|
applykernel = imStaticKernel4LinearAlphaNorm;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imStaticKernel4LinearAlphaNorm_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3Linear;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imStaticKernel2Linear;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imStaticKernel1Linear;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_SRGB )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
{
|
|
applykernel = imStaticKernel4sRGB;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imStaticKernel4sRGB_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 3 )
|
|
{
|
|
applykernel = imStaticKernel3sRGB;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imStaticKernel3sRGB_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imStaticKernel2sRGB;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imStaticKernel1sRGB;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
{
|
|
applykernel = imStaticKernel4sRGBAlphaNorm;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imStaticKernel4sRGBAlphaNorm_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3sRGB;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imStaticKernel2sRGB;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imStaticKernel1sRGB;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imStaticKernel4Normal;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3Normal;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imStaticKernel4NormalSustain;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3NormalSustain;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imStaticKernel4NormalSustainAlphaNorm;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imStaticKernel3NormalSustain;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_WATERMAP )
|
|
{
|
|
if( ( bytesperpixel == 4 ) && ( pow2flag ) )
|
|
applykernel = imStaticKernelPoT4Water;
|
|
else if( ( bytesperpixel == 3 ) && ( pow2flag ) )
|
|
applykernel = imStaticKernelPoT3Water;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_PLANTMAP )
|
|
{
|
|
if( ( bytesperpixel == 4 ) && ( pow2flag ) )
|
|
applykernel = imStaticKernelPoT4Plant;
|
|
}
|
|
|
|
if( !applykernel )
|
|
return 0;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
corebase = -state.matrixoffset;
|
|
corerange = ( newwidth + state.matrixoffset ) - corebase;
|
|
#endif
|
|
|
|
state.dithersum = 0.0f;
|
|
if( ( newwidth | newheight ) > 2 )
|
|
state.dithersum = 0.5f;
|
|
|
|
state.srcdata = srcdata;
|
|
state.width1 = width * 1;
|
|
state.width2 = width * 2;
|
|
state.width3 = width * 3;
|
|
state.width4 = width * 4;
|
|
state.height = height;
|
|
state.bytesperline = bytesperline;
|
|
|
|
state.minimumalpha = 4;
|
|
state.minimumalphaf = (float)state.minimumalpha;
|
|
state.amplifynormal = fmaxf( 1.0f, options->amplifynormal );
|
|
state.normalsustainfactor = options->normalsustainfactor;
|
|
|
|
basex = ( state.matrixoffset + ( width << 8 ) ) % width;
|
|
basey = ( state.matrixoffset + ( height << 8 ) ) % height;
|
|
while( basex < 0 )
|
|
basex += width;
|
|
while( basey < 0 )
|
|
basey += height;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
if( applykernelcore )
|
|
{
|
|
dst = dstdata;
|
|
pointy = basey;
|
|
for( y = 0 ; y < newheight ; y++ )
|
|
{
|
|
pointx = basex;
|
|
for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
|
|
{
|
|
( (unsigned int)( x - corebase ) < corerange ? applykernelcore : applykernel )( dst, pointx, pointy, &state );
|
|
pointx += sizedivisor;
|
|
while( pointx >= width )
|
|
pointx -= width;
|
|
}
|
|
pointy += sizedivisor;
|
|
while( pointy >= height )
|
|
pointy -= height;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
dst = dstdata;
|
|
pointy = basey;
|
|
for( y = 0 ; y < newheight ; y++ )
|
|
{
|
|
pointx = basex;
|
|
for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
|
|
{
|
|
applykernel( dst, pointx, pointy, &state );
|
|
pointx += sizedivisor;
|
|
while( pointx >= width )
|
|
pointx -= width;
|
|
}
|
|
pointy += sizedivisor;
|
|
while( pointy >= height )
|
|
pointy -= height;
|
|
}
|
|
}
|
|
|
|
imFreeStaticState( &state );
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
int imReduceImageKaiserDivisor( imgImage *imgdst, imgImage *imgsrc, int sizedivisor, imReduceOptions *options )
|
|
{
|
|
int width, height;
|
|
int newwidth, newheight, retvalue;
|
|
|
|
width = imgsrc->format.width;
|
|
height = imgsrc->format.height;
|
|
newwidth = ( width < sizedivisor ) ? 1 : ( ( width + sizedivisor - 1 ) / sizedivisor );
|
|
newheight = ( height < sizedivisor ) ? 1 : ( ( height + sizedivisor - 1 ) / sizedivisor );
|
|
|
|
imgdst->format.width = newwidth;
|
|
imgdst->format.height = newheight;
|
|
imgdst->format.type = imgsrc->format.type;
|
|
imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
|
|
imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
|
|
imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
|
|
if( !( imgdst->data ) )
|
|
return 0;
|
|
|
|
retvalue = imReduceImageKaiserDataDivisor( imgdst->data, imgsrc->data, width, height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, sizedivisor, options );
|
|
|
|
return retvalue;
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
static void imDynamicKernel1Linear( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0;
|
|
float matrixsum;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
matrixsum += f;
|
|
mapx++;
|
|
if( mapx >= state->width1 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
sum0 /= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel2Linear( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1;
|
|
float matrixsum;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 1;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
matrixsum += f;
|
|
mapx += 2;
|
|
if( mapx >= state->width2 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel3Linear( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2;
|
|
float matrixsum;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
matrixsum += f;
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel4Linear( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2, sum3;
|
|
float matrixsum;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel4LinearAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2, sum3, alpha;
|
|
float matrixsum;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
alpha = (float)src[ mapx + 3 ] * f;
|
|
sum0 += alpha * (float)src[ mapx + 0 ];
|
|
sum1 += alpha * (float)src[ mapx + 1 ];
|
|
sum2 += alpha * (float)src[ mapx + 2 ];
|
|
sum3 += alpha;
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum0 * f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum1 * f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, ( sum2 * f ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
|
|
static void imDynamicKernel4LinearAlphaNorm_Core( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
uint32_t pixel;
|
|
unsigned char *src;
|
|
__m128 vmatrixsum, vsum0, vsum1, vsum2, vsum3;
|
|
__m128 vlx, vly, vf, valpha, vr, vg, vb, va, vsrcf;
|
|
__m128i vsrc, vshufmask;
|
|
__m128i vzero;
|
|
|
|
#if CPU_SSSE3_SUPPORT
|
|
vshufmask = _mm_setr_epi8( 0x00,0x04,0x08,0x0c, 0x01,0x05,0x09,0x0d, 0x02,0x06,0x0a,0x0e, 0x03,0x07,0x0b,0x0f );
|
|
#endif
|
|
vsum0 = _mm_setzero_ps();
|
|
vsum1 = _mm_setzero_ps();
|
|
vsum2 = _mm_setzero_ps();
|
|
vsum3 = _mm_setzero_ps();
|
|
vmatrixsum = _mm_setzero_ps();
|
|
vzero = _mm_castps_si128( _mm_setzero_ps() );
|
|
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
vly = _mm_set1_ps( state->lineary[y] );
|
|
for( x = 0 ; x < state->matrixsizex ; x += 4 )
|
|
{
|
|
vlx = _mm_load_ps( &state->linearx[x] );
|
|
/* Load 16 bytes and unpack as RRRR,GGGG,BBBB,AAAA in one SSE register */
|
|
vsrc = _mm_loadu_si128( (void *)&src[ mapx ] );
|
|
#if CPU_SSSE3_SUPPORT
|
|
vsrc = _mm_shuffle_epi8( vsrc, vshufmask );
|
|
#else
|
|
vshufmask = _mm_shuffle_epi32( vsrc, 0x39 );
|
|
vsrc = _mm_unpacklo_epi16( _mm_unpacklo_epi8( vsrc, vshufmask ), _mm_unpackhi_epi8( vsrc, vshufmask ) );
|
|
#endif
|
|
/* Break that into 4 SSE registers as floats: vR,vG,vB,vA */
|
|
vsrcf = _mm_castsi128_ps( vsrc );
|
|
vr = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( vsrcf ), vzero ) );
|
|
#if CPU_SSE3_SUPPORT
|
|
vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehdup_ps( vsrcf ) ), vzero ) );
|
|
#else
|
|
vg = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0x55 ) ), vzero ) );
|
|
#endif
|
|
vb = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_movehl_ps( vsrcf, vsrcf ) ), vzero ) );
|
|
va = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_shuffle_ps( vsrcf, vsrcf, 0xff ) ), vzero ) );
|
|
vf = _mm_mul_ps( vlx, vly );
|
|
valpha = _mm_mul_ps( va, vf );
|
|
vsum0 = _mm_add_ps( vsum0, _mm_mul_ps( vr, valpha ) );
|
|
vsum1 = _mm_add_ps( vsum1, _mm_mul_ps( vg, valpha ) );
|
|
vsum2 = _mm_add_ps( vsum2, _mm_mul_ps( vb, valpha ) );
|
|
vsum3 = _mm_add_ps( vsum3, valpha );
|
|
vmatrixsum = _mm_add_ps( vmatrixsum, vf );
|
|
mapx += 16;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE3_SUPPORT
|
|
vmatrixsum = _mm_hadd_ps( vmatrixsum, vmatrixsum );
|
|
vmatrixsum = _mm_hadd_ps( vmatrixsum, vmatrixsum );
|
|
#else
|
|
vmatrixsum = _mm_add_ps( vmatrixsum, _mm_shuffle_ps( vmatrixsum, vmatrixsum, 0x4e ) );
|
|
vmatrixsum = _mm_add_ps( vmatrixsum, _mm_shuffle_ps( vmatrixsum, vmatrixsum, 0x39 ) );
|
|
#endif
|
|
|
|
#if CPU_SSE3_SUPPORT
|
|
vsum0 = _mm_hadd_ps( vsum0, vsum1 );
|
|
vsum2 = _mm_hadd_ps( vsum2, vsum3 );
|
|
vsum0 = _mm_hadd_ps( vsum0, vsum2 );
|
|
#else
|
|
vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum2 ), _mm_unpackhi_ps( vsum0, vsum2 ) );
|
|
vsum1 = _mm_add_ps( _mm_unpacklo_ps( vsum1, vsum3 ), _mm_unpackhi_ps( vsum1, vsum3 ) );
|
|
vsum0 = _mm_add_ps( _mm_unpacklo_ps( vsum0, vsum1 ), _mm_unpackhi_ps( vsum0, vsum1 ) );
|
|
#endif
|
|
vsum0 = _mm_div_ps( vsum0, vmatrixsum );
|
|
|
|
valpha = _mm_shuffle_ps( vsum0, vsum0, 0xff );
|
|
pixel = 0;
|
|
if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
|
|
{
|
|
__m128i vpixel;
|
|
vsum0 = _mm_mul_ps( vsum0, _mm_rcp_ps( valpha ) );
|
|
vsum0 = CPU_BLENDV_PS( vsum0, valpha, *(__m128 *)simd4fAlphaMask );
|
|
vpixel = _mm_cvtps_epi32( vsum0 );
|
|
vpixel = _mm_packs_epi32( vpixel, vpixel );
|
|
vpixel = _mm_packus_epi16( vpixel, vpixel );
|
|
pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
|
|
}
|
|
*(uint32_t *)dst = pixel;
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel1sRGB( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float sum0;
|
|
#endif
|
|
float f, matrixsum;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
#endif
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
f = state->linearx[x] * state->lineary[y];
|
|
vsrc = _mm_set_ss( (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
|
|
#else
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
#endif
|
|
matrixsum += f;
|
|
mapx++;
|
|
if( mapx >= state->width1 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
|
dst[0] = _mm_cvtsi128_si32( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) );
|
|
#else
|
|
sum0 /= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel2sRGB( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float sum0, sum1;
|
|
#endif
|
|
float f, matrixsum;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
#endif
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 1;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
f = state->linearx[x] * state->lineary[y];
|
|
vsrc = _mm_set_ps( 0.0f, 0.0f, (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
|
|
#else
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
#endif
|
|
matrixsum += f;
|
|
mapx += 2;
|
|
if( mapx >= state->width2 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
union
|
|
{
|
|
char c[4];
|
|
uint32_t i;
|
|
} u;
|
|
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
|
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
dst[0] = u.c[0];
|
|
dst[1] = u.c[1];
|
|
#else
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel3sRGB( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float sum0, sum1, sum2;
|
|
#endif
|
|
float f, matrixsum;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
#endif
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
f = state->linearx[x] * state->lineary[y];
|
|
vsrc = _mm_set_ps( 0.0f, (float)src[ mapx + 2 ], (float)src[ mapx + 1 ], (float)src[ mapx + 0 ] );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
|
|
#else
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
|
|
#endif
|
|
matrixsum += f;
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
union
|
|
{
|
|
char c[4];
|
|
uint32_t i;
|
|
} u;
|
|
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
|
_mm_store_ss( (float *)&u.i, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
dst[0] = u.c[0];
|
|
dst[1] = u.c[1];
|
|
dst[2] = u.c[2];
|
|
#else
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel4sRGB( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc;
|
|
__m128i vzero;
|
|
#else
|
|
float sum0, sum1, sum2, sum3;
|
|
#endif
|
|
float f, matrixsum;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
f = state->linearx[x] * state->lineary[y];
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_set1_ps( f ), vsrc ) );
|
|
#else
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += f * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += f * srgb2linear( (float)src[ mapx + 2 ] );
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
#endif
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = linear2srgb3( _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) ) );
|
|
_mm_store_ss( (float *)dst, _mm_castsi128_ps( _mm_packus_epi16( _mm_packs_epi32( _mm_cvtps_epi32( vsum ), vzero ), vzero ) ) );
|
|
#else
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel4sRGBAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
#if CPU_SSE2_SUPPORT
|
|
__m128 vsum, vsrc, valpha;
|
|
__m128i vzero;
|
|
uint32_t pixel;
|
|
#else
|
|
float sum0, sum1, sum2, sum3, alpha;
|
|
#endif
|
|
float f, matrixsum;
|
|
unsigned char *src;
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_setzero_ps();
|
|
vzero = _mm_setzero_si128();
|
|
#else
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
#endif
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
#if CPU_SSE2_SUPPORT
|
|
f = state->linearx[x] * state->lineary[y];
|
|
vsrc = _mm_cvtepi32_ps( CPU_CVT_U8_TO_I32( _mm_castps_si128( _mm_load_ss( (void *)&src[ mapx ] ) ), vzero ) );
|
|
valpha = _mm_shuffle_ps( vsrc, _mm_set_ss( 1.0f ), 0x0f );
|
|
vsrc = srgb2linear3( vsrc );
|
|
vsum = _mm_add_ps( vsum, _mm_mul_ps( _mm_mul_ps( _mm_shuffle_ps( valpha, valpha, 0xC0 ), _mm_set1_ps( f ) ), vsrc ) );
|
|
#else
|
|
f = state->linearx[x] * state->lineary[y];
|
|
alpha = (float)src[ mapx + 3 ] * f;
|
|
sum0 += alpha * srgb2linear( (float)src[ mapx + 0 ] );
|
|
sum1 += alpha * srgb2linear( (float)src[ mapx + 1 ] );
|
|
sum2 += alpha * srgb2linear( (float)src[ mapx + 2 ] );
|
|
sum3 += alpha;
|
|
#endif
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
vsum = _mm_div_ps( vsum, _mm_set1_ps( matrixsum ) );
|
|
valpha = _mm_shuffle_ps( vsum, vsum, 0xff );
|
|
pixel = 0;
|
|
if( _mm_comige_ss( valpha, _mm_load_ss( &state->minimumalphaf ) ) )
|
|
{
|
|
__m128i vpixel;
|
|
vsum = _mm_mul_ps( vsum, _mm_rcp_ps( valpha ) );
|
|
vsum = CPU_BLENDV_PS( vsum, valpha, *(__m128 *)simd4fAlphaMask );
|
|
vsum = linear2srgb3( vsum );
|
|
vpixel = _mm_cvtps_epi32( vsum );
|
|
vpixel = _mm_packs_epi32( vpixel, vpixel );
|
|
vpixel = _mm_packus_epi16( vpixel, vpixel );
|
|
pixel = (uint32_t)_mm_cvtsi128_si32( vpixel );
|
|
}
|
|
*(uint32_t *)dst = pixel;
|
|
#else
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * f ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel3Normal( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2;
|
|
float matrixsum, suminv;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
matrixsum += f;
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = (1.0f/255.0f) / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum0 -= 0.5f;
|
|
sum1 -= 0.5f;
|
|
sum2 -= 0.5f;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
|
|
sum0 = (0.5f*255.0f) + ( sum0 * suminv );
|
|
sum1 = (0.5f*255.0f) + ( sum1 * suminv );
|
|
sum2 = (0.5f*255.0f) + ( sum2 * suminv );
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void imDynamicKernel4Normal( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, sum0, sum1, sum2, sum3;
|
|
float matrixsum, suminv;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
sum0 += f * (float)src[ mapx + 0 ];
|
|
sum1 += f * (float)src[ mapx + 1 ];
|
|
sum2 += f * (float)src[ mapx + 2 ];
|
|
sum3 += f * (float)src[ mapx + 3 ];
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = (1.0f/255.0f) / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
sum0 -= 0.5f;
|
|
sum1 -= 0.5f;
|
|
sum2 -= 0.5f;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) );
|
|
sum0 = (0.5f*255.0f) + ( sum0 * suminv );
|
|
sum1 = (0.5f*255.0f) + ( sum1 * suminv );
|
|
sum2 = (0.5f*255.0f) + ( sum2 * suminv );
|
|
sum3 *= 255.0f;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel3NormalSustain( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, v0, v1, v2, energy, sum0, sum1, sum2, sumenergy;
|
|
float matrixsum, suminv;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx + ( state->matrixoffsetx << 1 );
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
matrixsum += f;
|
|
mapx += 3;
|
|
if( mapx >= state->width3 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = (1.0f/255.0f) / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static void imDynamicKernel4NormalSustain( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy;
|
|
float matrixsum, suminv;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
v0 = f * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = f * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = f * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
v3 = f * (float)src[ mapx + 3 ];
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
sum3 += v3;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = (1.0f/255.0f) / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
sum3 *= 255.0f;
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
static void imDynamicKernel4NormalSustainAlphaNorm( unsigned char *dst, imGenericMatrixState *state )
|
|
{
|
|
int x, y, mapx, mapy;
|
|
float f, alpha, v0, v1, v2, v3, energy, sum0, sum1, sum2, sum3, sumenergy;
|
|
float matrixsum, suminv;
|
|
unsigned char *src;
|
|
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 0.0f;
|
|
sumenergy = 0.0f;
|
|
matrixsum = 0.0f;
|
|
mapy = state->matrixoffsety;
|
|
for( y = 0 ; y < state->matrixsizey ; y++ )
|
|
{
|
|
src = ADDRESS( state->srcdata, ( mapy * state->bytesperline ) );
|
|
mapx = state->matrixoffsetx << 2;
|
|
for( x = 0 ; x < state->matrixsizex ; x++ )
|
|
{
|
|
f = state->linearx[x] * state->lineary[y];
|
|
alpha = (float)src[ mapx + 3 ] * f;
|
|
v0 = alpha * ( (float)src[ mapx + 0 ] - 127.5f );
|
|
v1 = alpha * ( (float)src[ mapx + 1 ] - 127.5f );
|
|
v2 = alpha * ( (float)src[ mapx + 2 ] - 127.5f );
|
|
v3 = alpha;
|
|
sum0 += v0;
|
|
sum1 += v1;
|
|
sum2 += v2;
|
|
sum3 += v3;
|
|
energy = ( v0 * v0 ) + ( v1 * v1 );
|
|
if( energy )
|
|
sumenergy += sqrtf( energy ) / sqrtf( energy + ( v2 * v2 ) );
|
|
matrixsum += f;
|
|
mapx += 4;
|
|
if( mapx >= state->width4 )
|
|
mapx = 0;
|
|
}
|
|
mapy++;
|
|
if( mapy >= state->height )
|
|
mapy = 0;
|
|
}
|
|
|
|
matrixsum = 1.0f / matrixsum;
|
|
sum0 *= matrixsum;
|
|
sum1 *= matrixsum;
|
|
sum2 *= matrixsum;
|
|
sum3 *= matrixsum;
|
|
if( sum3 >= state->minimumalphaf )
|
|
{
|
|
f = 1.0f / sum3;
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
sum2 *= f;
|
|
sum0 *= state->amplifynormal;
|
|
sum1 *= state->amplifynormal;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
energy = sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) );
|
|
sumenergy *= state->normalsustainfactor;
|
|
if( energy < sumenergy )
|
|
{
|
|
f = fminf( sumenergy / energy, 8.0f );
|
|
sum0 *= f;
|
|
sum1 *= f;
|
|
suminv = (0.5f*255.0f) / fmaxf( 0.0625f, sqrtf( ( sum0 * sum0 ) + ( sum1 * sum1 ) + ( sum2 * sum2 ) ) );
|
|
sum0 *= suminv;
|
|
sum1 *= suminv;
|
|
sum2 *= suminv;
|
|
}
|
|
sum0 += (0.5f*255.0f);
|
|
sum1 += (0.5f*255.0f);
|
|
sum2 += (0.5f*255.0f);
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum0 + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum1 + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, sum3 + 0.5f ) ) );
|
|
}
|
|
else
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
int imReduceImageKaiserData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, int newwidth, int newheight, imReduceOptions *options )
|
|
{
|
|
int filter, x, y;
|
|
float scalex, scaley, scaleinvx, scaleinvy;
|
|
float sourcex, sourcey;
|
|
unsigned char *dst;
|
|
imGenericMatrixState state;
|
|
void (*applykernel)( unsigned char *dst, imGenericMatrixState *state );
|
|
#if CPU_SSE2_SUPPORT
|
|
void (*applykernelcore)( unsigned char *dst, imGenericMatrixState *state );
|
|
#endif
|
|
|
|
filter = options->filter;
|
|
if( ( newwidth > width ) || ( newheight > height ) )
|
|
return 0;
|
|
|
|
applykernel = 0;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = 0;
|
|
#endif
|
|
|
|
if( filter == IM_REDUCE_FILTER_LINEAR )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4Linear;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3Linear;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imDynamicKernel2Linear;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imDynamicKernel1Linear;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
{
|
|
applykernel = imDynamicKernel4LinearAlphaNorm;
|
|
#if CPU_SSE2_SUPPORT
|
|
applykernelcore = imDynamicKernel4LinearAlphaNorm_Core;
|
|
#endif
|
|
}
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3Linear;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imDynamicKernel2Linear;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imDynamicKernel1Linear;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_SRGB )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4sRGB;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3sRGB;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imDynamicKernel2sRGB;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imDynamicKernel1sRGB;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4sRGBAlphaNorm;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3sRGB;
|
|
else if( bytesperpixel == 2 )
|
|
applykernel = imDynamicKernel2sRGB;
|
|
else if( bytesperpixel == 1 )
|
|
applykernel = imDynamicKernel1sRGB;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4Normal;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3Normal;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4NormalSustain;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3NormalSustain;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
applykernel = imDynamicKernel4NormalSustainAlphaNorm;
|
|
else if( bytesperpixel == 3 )
|
|
applykernel = imDynamicKernel3NormalSustain;
|
|
}
|
|
|
|
if( !applykernel )
|
|
return 0;
|
|
|
|
state.minimumalpha = 4;
|
|
state.minimumalphaf = (float)state.minimumalpha;
|
|
state.amplifynormal = fmaxf( 1.0f, options->amplifynormal );
|
|
state.normalsustainfactor = options->normalsustainfactor;
|
|
|
|
state.dithersum = 0.0f;
|
|
if( ( newwidth | newheight ) > 2 )
|
|
state.dithersum = 0.5f;
|
|
|
|
state.srcdata = srcdata;
|
|
state.width1 = width * 1;
|
|
state.width2 = width * 2;
|
|
state.width3 = width * 3;
|
|
state.width4 = width * 4;
|
|
state.height = height;
|
|
state.bytesperline = bytesperline;
|
|
|
|
scalex = (float)newwidth / (float)width;
|
|
scaley = (float)newheight / (float)height;
|
|
scaleinvx = (float)width / (float)newwidth;
|
|
scaleinvy = (float)height / (float)newheight;
|
|
|
|
imAllocGenericState( &state, scalex, scaley, options->hopcount, options->alpha );
|
|
|
|
#if CPU_SSE2_SUPPORT
|
|
if( applykernelcore )
|
|
{
|
|
dst = dstdata;
|
|
for( y = 0 ; y < newheight ; y++ )
|
|
{
|
|
sourcey = ( ( (float)y + 0.5f ) * scaleinvy ) - 0.5f;
|
|
imBuildGenericLinearY( &state, scaley, scaleinvy, sourcey, options->hopcount, options->alpha, height );
|
|
for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
|
|
{
|
|
sourcex = ( ( (float)x + 0.5f ) * scaleinvx ) - 0.5f;
|
|
imBuildGenericLinearX( &state, scalex, scaleinvx, sourcex, options->hopcount, options->alpha, width );
|
|
if( ( state.matrixoffsetx + ( ( state.matrixsizex + 3 ) & ~3 ) ) < width )
|
|
applykernelcore( dst, &state );
|
|
else
|
|
applykernel( dst, &state );
|
|
}
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
dst = dstdata;
|
|
for( y = 0 ; y < newheight ; y++ )
|
|
{
|
|
sourcey = ( ( (float)y + 0.5f ) * scaleinvy ) - 0.5f;
|
|
imBuildGenericLinearY( &state, scaley, scaleinvy, sourcey, options->hopcount, options->alpha, height );
|
|
for( x = 0 ; x < newwidth ; x++, dst += bytesperpixel )
|
|
{
|
|
sourcex = ( ( (float)x + 0.5f ) * scaleinvx ) - 0.5f;
|
|
imBuildGenericLinearX( &state, scalex, scaleinvx, sourcex, options->hopcount, options->alpha, width );
|
|
applykernel( dst, &state );
|
|
}
|
|
}
|
|
}
|
|
|
|
imFreeGenericState( &state );
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
int imReduceImageKaiser( imgImage *imgdst, imgImage *imgsrc, int newwidth, int newheight, imReduceOptions *options )
|
|
{
|
|
int retvalue;
|
|
|
|
imgdst->format.width = newwidth;
|
|
imgdst->format.height = newheight;
|
|
imgdst->format.type = imgsrc->format.type;
|
|
imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
|
|
imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
|
|
imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
|
|
if( !( imgdst->data ) )
|
|
return 0;
|
|
|
|
retvalue = imReduceImageKaiserData( imgdst->data, imgsrc->data, imgsrc->format.width, imgsrc->format.height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, newwidth, newheight, options );
|
|
|
|
return retvalue;
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox1Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox2Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
|
|
dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox3Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
|
|
dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
|
|
dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox4Linear( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
|
|
dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
|
|
dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
|
|
dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox1sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
int i, offset[4];
|
|
float sum0;
|
|
offset[0] = 0;
|
|
offset[1] = bytesperpixel;
|
|
offset[2] = bytesperline;
|
|
offset[3] = bytesperline + bytesperpixel;
|
|
sum0 = 0.0f;
|
|
for( i = 0 ; i < 4 ; i++ )
|
|
sum0 += srgb2linear( (float)src[offset[i]+0] );
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox2sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
int i, offset[4];
|
|
float sum0, sum1;
|
|
offset[0] = 0;
|
|
offset[1] = bytesperpixel;
|
|
offset[2] = bytesperline;
|
|
offset[3] = bytesperline + bytesperpixel;
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
for( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
sum0 += srgb2linear( (float)src[offset[i]+0] );
|
|
sum1 += srgb2linear( (float)src[offset[i]+1] );
|
|
}
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox3sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
int i, offset[4];
|
|
float sum0, sum1, sum2;
|
|
offset[0] = 0;
|
|
offset[1] = bytesperpixel;
|
|
offset[2] = bytesperline;
|
|
offset[3] = bytesperline + bytesperpixel;
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
for( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
sum0 += srgb2linear( (float)src[offset[i]+0] );
|
|
sum1 += srgb2linear( (float)src[offset[i]+1] );
|
|
sum2 += srgb2linear( (float)src[offset[i]+2] );
|
|
}
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * 0.25f ) + 0.5f ) ) );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox4sRGB( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
int i, offset[4], sum3;
|
|
float sum0, sum1, sum2;
|
|
offset[0] = 0;
|
|
offset[1] = bytesperpixel;
|
|
offset[2] = bytesperline;
|
|
offset[3] = bytesperline + bytesperpixel;
|
|
sum0 = 0.0f;
|
|
sum1 = 0.0f;
|
|
sum2 = 0.0f;
|
|
sum3 = 2;
|
|
for( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
sum0 += srgb2linear( (float)src[offset[i]+0] );
|
|
sum1 += srgb2linear( (float)src[offset[i]+1] );
|
|
sum2 += srgb2linear( (float)src[offset[i]+2] );
|
|
sum3 += (int)src[offset[i]+2];
|
|
}
|
|
dst[0] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum0 * 0.25f ) + 0.5f ) ) );
|
|
dst[1] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum1 * 0.25f ) + 0.5f ) ) );
|
|
dst[2] = (unsigned char)( fmaxf( 0.0f, fminf( 255.0f, linear2srgb( sum2 * 0.25f ) + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( sum3 >> 2 );
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox3Normal( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
float v0, v1, v2, suminv;
|
|
|
|
v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
|
|
v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
|
|
v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
|
|
v0 = 2.0f * ( v0 - 0.5f );
|
|
v1 = 2.0f * ( v1 - 0.5f );
|
|
v2 = 2.0f * ( v2 - 0.5f );
|
|
suminv = 0.5f / sqrtf( ( v0 * v0 ) + ( v1 * v1 ) + ( v2 * v2 ) );
|
|
v0 = 0.5f + ( v0 * suminv );
|
|
v1 = 0.5f + ( v1 * suminv );
|
|
v2 = 0.5f + ( v2 * suminv );
|
|
dst[0] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v0 );
|
|
dst[1] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v1 );
|
|
dst[2] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v2 );
|
|
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox4Normal( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
float v0, v1, v2, suminv;
|
|
|
|
v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
|
|
v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
|
|
v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
|
|
v0 = 2.0f * ( v0 - 0.5f );
|
|
v1 = 2.0f * ( v1 - 0.5f );
|
|
v2 = 2.0f * ( v2 - 0.5f );
|
|
suminv = 0.5f / sqrtf( ( v0 * v0 ) + ( v1 * v1 ) + ( v2 * v2 ) );
|
|
v0 = 0.5f + ( v0 * suminv );
|
|
v1 = 0.5f + ( v1 * suminv );
|
|
v2 = 0.5f + ( v2 * suminv );
|
|
dst[0] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v0 );
|
|
dst[1] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v1 );
|
|
dst[2] = (unsigned char)ROUND_POSITIVE_FLOAT( 255.0f * v2 );
|
|
dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
|
|
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox3Water( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
float v0, v1, v2, suminv;
|
|
|
|
v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
|
|
v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
|
|
v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
|
|
|
|
v0 = 2.0f * ( v0 - 0.5f );
|
|
v1 = 2.0f * ( v1 - 0.5f );
|
|
suminv = sqrtf( ( v0 * v0 ) + ( v1 * v1 ) );
|
|
if( suminv < 0.75f )
|
|
{
|
|
suminv = 0.5f / suminv;
|
|
v0 = 0.5f + ( v0 * suminv );
|
|
v1 = 0.5f + ( v1 * suminv );
|
|
}
|
|
if( v2 > 0.1f )
|
|
{
|
|
*dithersum += v2;
|
|
if( v2 > 0.45f )
|
|
v2 = 1.0f;
|
|
else if( ( v2 < 0.3f ) && ( *dithersum < 1.0f ) )
|
|
v2 = 0.0f;
|
|
else
|
|
v2 = ( ( v2 + *dithersum ) < 0.45f ? 0.0f : 1.0f );
|
|
*dithersum -= v2;
|
|
}
|
|
v0 *= 255.0f;
|
|
v1 *= 255.0f;
|
|
v2 *= 255.0f;
|
|
|
|
dst[0] = (int)( fmaxf( 0.0f, fminf( 255.0f, v0 + 0.5f ) ) );
|
|
dst[1] = (int)( fmaxf( 0.0f, fminf( 255.0f, v1 + 0.5f ) ) );
|
|
dst[2] = (int)( fmaxf( 0.0f, fminf( 255.0f, v2 + 0.5f ) ) );
|
|
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox4Water( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
float v0, v1, v2, suminv;
|
|
|
|
v0 = (1.0f/1020.0f) * (float)( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] );
|
|
v1 = (1.0f/1020.0f) * (float)( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] );
|
|
v2 = (1.0f/1020.0f) * (float)( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] );
|
|
|
|
v0 = 2.0f * ( v0 - 0.5f );
|
|
v1 = 2.0f * ( v1 - 0.5f );
|
|
suminv = sqrtf( ( v0 * v0 ) + ( v1 * v1 ) );
|
|
if( suminv < 0.75f )
|
|
{
|
|
suminv = 0.5f / suminv;
|
|
v0 = 0.5f + ( v0 * suminv );
|
|
v1 = 0.5f + ( v1 * suminv );
|
|
}
|
|
if( v2 > 0.1f )
|
|
{
|
|
*dithersum += v2;
|
|
if( v2 > 0.45f )
|
|
v2 = 1.0f;
|
|
else if( ( v2 < 0.3f ) && ( *dithersum < 1.0f ) )
|
|
v2 = 0.0f;
|
|
else
|
|
v2 = ( ( v2 + *dithersum ) < 0.45f ? 0.0f : 1.0f );
|
|
*dithersum -= v2;
|
|
}
|
|
v0 *= 255.0f;
|
|
v1 *= 255.0f;
|
|
v2 *= 255.0f;
|
|
|
|
dst[0] = (int)( fmaxf( 0.0f, fminf( 255.0f, v0 + 0.5f ) ) );
|
|
dst[1] = (int)( fmaxf( 0.0f, fminf( 255.0f, v1 + 0.5f ) ) );
|
|
dst[2] = (int)( fmaxf( 0.0f, fminf( 255.0f, v2 + 0.5f ) ) );
|
|
dst[3] = (unsigned char)( ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] + 2 ) >> 2 );
|
|
|
|
return;
|
|
}
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceHalfBox4Plant( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum )
|
|
{
|
|
int alpha;
|
|
|
|
dst[0] = (unsigned char)( ( (int)src[0] + (int)src[bytesperpixel+0] + (int)src[bytesperline+0] + (int)src[bytesperpixel+bytesperline+0] + 2 ) >> 2 );
|
|
dst[1] = (unsigned char)( ( (int)src[1] + (int)src[bytesperpixel+1] + (int)src[bytesperline+1] + (int)src[bytesperpixel+bytesperline+1] + 2 ) >> 2 );
|
|
dst[2] = (unsigned char)( ( (int)src[2] + (int)src[bytesperpixel+2] + (int)src[bytesperline+2] + (int)src[bytesperpixel+bytesperline+2] + 2 ) >> 2 );
|
|
|
|
alpha = ( (int)src[3] + (int)src[bytesperpixel+3] + (int)src[bytesperline+3] + (int)src[bytesperpixel+bytesperline+3] );
|
|
alpha += alpha >> 2;
|
|
alpha = ( alpha + 2 ) >> 2;
|
|
if( alpha > 255 )
|
|
alpha = 255;
|
|
dst[3] = (unsigned char)alpha;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static inline CC_ALWAYSINLINE void imReduceImageHalfBoxWork( unsigned char *dst, unsigned char *src, int width, int height, int bytesperpixel, int bytesperline, void (*work)( unsigned char *dst, unsigned char *src, int bytesperpixel, int bytesperline, float *dithersum ) )
|
|
{
|
|
int x, y, newwidth, newheight, rowoffset;
|
|
float dithersum;
|
|
newwidth = ( width < 2 ) ? 1 : ( ( width + 1 ) / 2 );
|
|
newheight = ( height < 2 ) ? 1 : ( ( height + 1 ) / 2 );
|
|
rowoffset = bytesperline + ( bytesperpixel * ( width - ( newwidth << 1 ) ) );
|
|
dithersum = 0.0f;
|
|
if( ( newwidth | newheight ) > 2 )
|
|
dithersum = 0.5f;
|
|
for( y = 0 ; y < newheight ; y++ )
|
|
{
|
|
for( x = 0 ; x < newwidth ; x++, src += bytesperpixel, dst += bytesperpixel )
|
|
{
|
|
work( dst, src, bytesperpixel, bytesperline, &dithersum );
|
|
src += bytesperpixel;
|
|
}
|
|
src += rowoffset;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
int imReduceImageHalfBoxData( unsigned char *dstdata, unsigned char *srcdata, int width, int height, int bytesperpixel, int bytesperline, imReduceOptions *options )
|
|
{
|
|
int filter, retval;
|
|
|
|
filter = options->filter;
|
|
retval = 1;
|
|
if( ( filter == IM_REDUCE_FILTER_LINEAR ) || ( filter == IM_REDUCE_FILTER_LINEAR_ALPHANORM ) )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Linear );
|
|
else if( bytesperpixel == 3 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Linear );
|
|
else if( bytesperpixel == 2 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox2Linear );
|
|
else if( bytesperpixel == 1 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox1Linear );
|
|
else
|
|
retval = 0;
|
|
}
|
|
else if( ( filter == IM_REDUCE_FILTER_SRGB ) || ( filter == IM_REDUCE_FILTER_SRGB_ALPHANORM ) )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4sRGB );
|
|
else if( bytesperpixel == 3 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3sRGB );
|
|
else if( bytesperpixel == 2 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox2sRGB );
|
|
else if( bytesperpixel == 1 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox1sRGB );
|
|
else
|
|
retval = 0;
|
|
}
|
|
else if( ( filter == IM_REDUCE_FILTER_NORMALMAP ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_ALPHANORM ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN ) || ( filter == IM_REDUCE_FILTER_NORMALMAP_SUSTAIN_ALPHANORM ) )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Normal );
|
|
else if( bytesperpixel == 3 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Normal );
|
|
else
|
|
retval = 0;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_WATERMAP )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Water );
|
|
else if( bytesperpixel == 3 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox3Water );
|
|
else
|
|
retval = 0;
|
|
}
|
|
else if( filter == IM_REDUCE_FILTER_PLANTMAP )
|
|
{
|
|
if( bytesperpixel == 4 )
|
|
imReduceImageHalfBoxWork( dstdata, srcdata, width, height, bytesperpixel, bytesperline, imReduceHalfBox4Plant );
|
|
else
|
|
retval = 0;
|
|
}
|
|
else
|
|
retval = 0;
|
|
|
|
return retval;
|
|
}
|
|
|
|
|
|
int imReduceImageHalfBox( imgImage *imgdst, imgImage *imgsrc, imReduceOptions *options )
|
|
{
|
|
int newwidth, newheight, retvalue;
|
|
|
|
newwidth = ( ( imgsrc->format.width < 2 ) ? 1 : ( ( imgsrc->format.width + 1 ) / 2 ) );
|
|
newheight = ( ( imgsrc->format.height < 2 ) ? 1 : ( ( imgsrc->format.height + 1 ) / 2 ) );
|
|
|
|
imgdst->format.width = newwidth;
|
|
imgdst->format.height = newheight;
|
|
imgdst->format.type = imgsrc->format.type;
|
|
imgdst->format.bytesperpixel = imgsrc->format.bytesperpixel;
|
|
imgdst->format.bytesperline = imgdst->format.width * imgdst->format.bytesperpixel;
|
|
imgdst->data = malloc( imgdst->format.height * imgdst->format.bytesperline );
|
|
|
|
retvalue = imReduceImageHalfBoxData( imgdst->data, imgsrc->data, imgsrc->format.width, imgsrc->format.height, imgsrc->format.bytesperpixel, imgsrc->format.bytesperline, options );
|
|
|
|
return retvalue;
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
int imBuildMipmapCascade( imMipmapCascade *cascade, void *imagedata, int width, int height, int layercount, int bytesperpixel, int bytesperline, imReduceOptions *options, int cascadeflags )
|
|
{
|
|
int layerindex, level, srclevel, srcwidth, srcheight, method, divisor;
|
|
int levelwidth, levelheight;
|
|
void *src, *dst;
|
|
|
|
cascade->width = width;
|
|
cascade->height = height;
|
|
cascade->layercount = layercount;
|
|
cascade->bytesperpixel = bytesperpixel;
|
|
cascade->bytesperline = bytesperline;
|
|
cascade->options = options;
|
|
|
|
/* No need for mipmaps */
|
|
if( ( cascade->width == 1 ) && ( cascade->height == 1 ) )
|
|
return 1;
|
|
if( bytesperpixel != 4 )
|
|
cascadeflags &= ~( IM_CASCADE_FLAGS_COLOR_BORDER_BASE | IM_CASCADE_FLAGS_COLOR_BORDER_MIPMAPS );
|
|
|
|
/* Allocate all the mipmap levels */
|
|
if( !( layercount ) )
|
|
layercount = 1;
|
|
cascade->mipmap[0] = imagedata;
|
|
levelwidth = cascade->width;
|
|
levelheight = cascade->height;
|
|
for( level = 1 ; ; level++ )
|
|
{
|
|
levelwidth = ( levelwidth < 2 ) ? 1 : ( levelwidth >> 1 );
|
|
levelheight = ( levelheight < 2 ) ? 1 : ( levelheight >> 1 );
|
|
if( !( cascade->mipmap[level] = malloc( levelwidth * levelheight * layercount * bytesperpixel ) ) )
|
|
return 0;
|
|
if( ( levelwidth == 1 ) && ( levelheight == 1 ) )
|
|
break;
|
|
}
|
|
cascade->mipmap[level+1] = 0;
|
|
|
|
if( cascadeflags & IM_CASCADE_FLAGS_COLOR_BORDER_BASE )
|
|
imPropagateAlphaBorder( imagedata, width, height * layercount, bytesperpixel, bytesperline );
|
|
|
|
/* For every layer, compute all its mipmap */
|
|
for( layerindex = 0 ; layerindex < layercount ; layerindex++ )
|
|
{
|
|
levelwidth = cascade->width;
|
|
levelheight = cascade->height;
|
|
for( level = 1 ; cascade->mipmap[level] ; level++ )
|
|
{
|
|
levelwidth = ( levelwidth < 2 ) ? 1 : ( levelwidth >> 1 );
|
|
levelheight = ( levelheight < 2 ) ? 1 : ( levelheight >> 1 );
|
|
dst = ADDRESS( cascade->mipmap[level], layerindex * levelwidth * levelheight * bytesperpixel );
|
|
|
|
/* Decide what method and source level to pick */
|
|
if( ( levelwidth | levelheight ) >= 16 )
|
|
{
|
|
srclevel = level - 2;
|
|
if( srclevel < 0 )
|
|
srclevel = 0;
|
|
method = 1;
|
|
}
|
|
else
|
|
{
|
|
srclevel = level - 1;
|
|
method = 0;
|
|
}
|
|
#if DEBUG_VERBOSE
|
|
printf( "Tex level %d, srclevel %d, layer %d, filter %d, method %d : %d x %d\n", level, srclevel, layerindex, options->filter, method, levelwidth, levelheight );
|
|
#endif
|
|
srcwidth = width >> srclevel;
|
|
if( !( srcwidth ) )
|
|
srcwidth = 1;
|
|
srcheight = height >> srclevel;
|
|
if( !( srcheight ) )
|
|
srcheight = 1;
|
|
if( srclevel )
|
|
src = ADDRESS( cascade->mipmap[srclevel], layerindex * srcheight * srcwidth * bytesperpixel );
|
|
else
|
|
src = ADDRESS( cascade->mipmap[srclevel], layerindex * srcheight * cascade->bytesperline );
|
|
|
|
divisor = 1 << ( level - srclevel );
|
|
if( ( ( levelwidth * divisor ) != srcwidth ) || ( ( levelheight * divisor ) != srcheight ) )
|
|
method = 2;
|
|
|
|
if( method == 2 )
|
|
{
|
|
if( !( imReduceImageKaiserData( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, levelwidth, levelheight, options ) ) )
|
|
{
|
|
printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
|
|
return 0;
|
|
}
|
|
}
|
|
else if( method == 1 )
|
|
{
|
|
if( !( imReduceImageKaiserDataDivisor( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, divisor, options ) ) )
|
|
{
|
|
printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( !( imReduceImageHalfBoxData( dst, src, srcwidth, srcheight, bytesperpixel, srcwidth * bytesperpixel, options ) ) )
|
|
{
|
|
printf( "ERROR AT %s:%d\n", __FILE__, __LINE__ );
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if( cascadeflags & IM_CASCADE_FLAGS_COLOR_BORDER_MIPMAPS )
|
|
imPropagateAlphaBorder( dst, levelwidth, levelheight, bytesperpixel, levelwidth * bytesperpixel );
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
void imFreeMipmapCascade( imMipmapCascade *cascade )
|
|
{
|
|
int level;
|
|
for( level = 1 ; ; level++ )
|
|
{
|
|
if( !( cascade->mipmap[level] ) )
|
|
break;
|
|
free( cascade->mipmap[level] );
|
|
cascade->mipmap[level] = 0;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
////
|
|
|
|
|
|
#define IM_PIXEL_ALPHA_MASK (0xff000000)
|
|
#define IM_PIXEL_RGB_MASK (0x00ffffff)
|
|
|
|
void imPropagateAlphaBorder( unsigned char *imagedata, int width, int height, int bytesperpixel, int bytesperline )
|
|
{
|
|
int x, y, backtrackflag;
|
|
uint32_t pixel, refcolor, prevrowpixel;
|
|
uint32_t *row, *prevrow;
|
|
|
|
if( bytesperpixel != 4 )
|
|
return;
|
|
row = (uint32_t *)imagedata;
|
|
prevrow = row;
|
|
for( y = 0 ; y < height ; y++ )
|
|
{
|
|
refcolor = 0;
|
|
backtrackflag = 0;
|
|
for( x = 0 ; x < width ; x++ )
|
|
{
|
|
pixel = row[x];
|
|
prevrowpixel = prevrow[x];
|
|
if( pixel & IM_PIXEL_ALPHA_MASK )
|
|
{
|
|
/* Pixel has some color, spread to neighbor if applicable */
|
|
refcolor = pixel & IM_PIXEL_RGB_MASK;
|
|
if( backtrackflag )
|
|
{
|
|
row[x-1] = refcolor;
|
|
backtrackflag = 0;
|
|
}
|
|
if( !( prevrowpixel & IM_PIXEL_ALPHA_MASK ) )
|
|
prevrow[x] = refcolor;
|
|
}
|
|
else
|
|
{
|
|
/* Pixel is fully transparent, spread from neighbor if applicable */
|
|
if( refcolor )
|
|
{
|
|
row[x] = refcolor;
|
|
backtrackflag = 0;
|
|
refcolor = 0;
|
|
}
|
|
else if( prevrowpixel & IM_PIXEL_ALPHA_MASK )
|
|
{
|
|
row[x] = prevrowpixel & IM_PIXEL_RGB_MASK;
|
|
backtrackflag = 0;
|
|
}
|
|
else
|
|
backtrackflag = 1;
|
|
}
|
|
}
|
|
prevrow = row;
|
|
row = ADDRESS( row, bytesperline );
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|