diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index 1f05c6dfa..3ce6c83cb 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -22,28 +22,17 @@ // //////////////////////////////////////////////////////////////// -//#include "rtengine.h" #include #include -#include "curves.h" #include "array2D.h" -#include "improcfun.h" #include "rawimagesource.h" -//#include "stack1.h" - +#include "rt_math.h" +#include "opthelper.h" #ifdef _OPENMP #include #endif -#include "rt_math.h" -#include "rawimagesource.h" -#ifdef __SSE2__ -#include "sleefsseavx.c" -#endif // __SSE2__ - - - #define FOREACHCOLOR for (int c=0; c < ColorCount; c++) @@ -54,13 +43,8 @@ namespace rtengine { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void RawImageSource::boxblur2(float** src, float** dst, int H, int W, int box ) -#else -void RawImageSource::boxblur2(float** src, float** dst, int H, int W, int box ) -#endif +SSEFUNCTION void RawImageSource::boxblur2(float** src, float** dst, int H, int W, int box ) { - array2D temp(W,H); //box blur image channel; box size = 2*box+1 @@ -97,30 +81,68 @@ void RawImageSource::boxblur2(float** src, float** dst, int H, int W, int box ) __m128 lenv = _mm_set1_ps( len ); __m128 lenp1v = _mm_set1_ps( len + 1.0f ); __m128 onev = _mm_set1_ps( 1.0f ); - __m128 tempv; + __m128 tempv,temp2v; #ifdef _OPENMP -#pragma omp for +#pragma omp for nowait #endif - for (int col = 0; col < W-3; col+=4) { + for (int col = 0; col < W-7; col+=8) { + tempv = LVFU(temp[0][col]) / lenv; + temp2v = LVFU(temp[0][col+4]) / lenv; + for (int i=1; i<=box; i++) { + tempv = tempv + LVFU(temp[i][col]) / lenv; + temp2v = temp2v + LVFU(temp[i][col+4]) / lenv; + } + _mm_storeu_ps( &dst[0][col], tempv); + _mm_storeu_ps( &dst[0][col+4], temp2v); + for (int row=1; row<=box; row++) { + tempv = (tempv * lenv + LVFU(temp[(row+box)][col]))/lenp1v; + temp2v = (temp2v * lenv + LVFU(temp[(row+box)][col+4]))/lenp1v; + _mm_storeu_ps( &dst[row][col], tempv); + _mm_storeu_ps( &dst[row][col+4], temp2v); + lenv = lenp1v; + lenp1v = lenp1v + onev; + } + for (int row = box+1; row < H-box; row++) { + tempv = tempv + (LVFU(temp[(row+box)][col]) - LVFU(temp[(row-box-1)][col]))/lenv; + temp2v = temp2v + (LVFU(temp[(row+box)][col+4]) - LVFU(temp[(row-box-1)][col+4]))/lenv; + _mm_storeu_ps( &dst[row][col], tempv); + _mm_storeu_ps( &dst[row][col+4], temp2v); + } + for (int row=H-box; row temp((W/samp)+ ((W%samp)==0 ? 0 : 1),H); - float maxtmp=0.0f; - #ifdef _OPENMP #pragma omp parallel #endif { - float maxtmpthr = 0; float tempval; #ifdef _OPENMP #pragma omp for @@ -189,42 +207,29 @@ void RawImageSource::boxblur_resamp(float **src, float **dst, float & max_f, int for (int row = 0; row < H; row++) { int len = box + 1; tempval = src[row][0]/len; - maxtmpthr = max(maxtmpthr,src[row][0]); for (int j=1; j<=box; j++) { tempval += src[row][j]/len; - maxtmpthr = max(maxtmpthr,src[row][j]); } temp[row][0] = tempval; for (int col=1; col<=box; col++) { tempval = (tempval*len + src[row][col+box])/(len+1); if(col%samp == 0) temp[row][col/samp] = tempval; - maxtmpthr = max(maxtmpthr,src[row][col]); len ++; } for (int col = box+1; col < W-box; col++) { tempval = tempval + (src[row][col+box] - src[row][col-box-1])/len; if(col%samp == 0) temp[row][col/samp] = tempval; - maxtmpthr = max(maxtmpthr,src[row][col]); } for (int col=W-box; col hfsize (hfw+1,hfh+1,ARRAY2D_CLEAR_DATA); boxblur_resamp(red,hfsize[0],chmaxalt[0],height,width,range,pitch); @@ -339,7 +344,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b //for (int m=0; m<3; m++) // boxblur2(hfsize[m],hfsizeblur[m],hfh,hfw,3); - +*/ //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -385,7 +390,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b // set up which pixels are clipped or near clipping #ifdef _OPENMP -#pragma omp parallel for reduction(+:hipass_sum,hipass_norm) +#pragma omp parallel for reduction(+:hipass_sum,hipass_norm) schedule(dynamic,16) #endif for (int i=0; isetProgress(progress); @@ -489,9 +494,6 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b //fill gaps in highlight map by directional extension //raster scan from four corners -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int j=1; jsetProgress(progress); } -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int j=hfw-2; j>0; j--) for (int i=2; isetProgress(progress); } -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int i=1; isetProgress(progress); } -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int i=hfh-2; i>0; i--) for (int j=2; jsetProgress(progress); } -#ifdef _OPENMP -#pragma omp parallel for -#endif //fill in edges for (int dir=0; dirverbose) printf ("Applying Highlight Recovery: Color propagation...\n"); - HLRecovery_inpaint (red,green,blue); - rgbSourceModified = true; - } - else{ - rgbSourceModified = false; + if(!rgbSourceModified) { + if (settings->verbose) printf ("Applying Highlight Recovery: Color propagation...\n"); + HLRecovery_inpaint (red,green,blue); + rgbSourceModified = true; + } } } diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index a5197d03a..3c48640f8 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -188,7 +188,7 @@ class RawImageSource : public ImageSource { static void inverse33 (const double (*coeff)[3], double (*icoeff)[3]); void boxblur2(float** src, float** dst, int H, int W, int box ); - void boxblur_resamp(float **src, float **dst, float & max, int H, int W, int box, int samp ); + void boxblur_resamp(float **src, float **dst, int H, int W, int box, int samp ); //void boxblur_resamp(float **red, float **green, float **blue, int H, int W, float thresh[3], float max[3], // multi_array2D & hfsize, multi_array2D & hilite, int box );