Speedup for Vibrance and Contrast by Detail Levels, Issue 2520

2014-10-04 12:10:31 +02:00
parent e182daaddc
commit 96d80d1102
5 changed files with 723 additions and 419 deletions
--- a/rtengine/dirpyr_equalizer.cc
+++ b/rtengine/dirpyr_equalizer.cc
@@ -24,15 +24,11 @@
 #include "labimage.h"
 #include "color.h"
 #include "mytime.h"
-//#include "StopWatch.h"
-
 #include "improcfun.h"
 #include "rawimagesource.h"
 #include "array2D.h"
 #include "rt_math.h"
-#ifdef __SSE2__
-#include "sleefsseavx.c"
-#endif
+#include "opthelper.h"
 #ifdef _OPENMP
 #include <omp.h>
 #endif
@@ -46,20 +42,16 @@ namespace rtengine {
 	
 	static const int maxlevel = 5;
 	static const float noise = 2000;
-	static const float thresh = 1000;
 	
 	//sequence of scales
-	static const int scales[8] = {1,2,4,8,16,32,64,128};
+	static const int scales[5] = {1,2,4,8,16};
 	extern const Settings* settings;
 	
 	//sequence of scales
 	
 	
-	void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, float ** dest_a, float ** dest_b,const double * mult, const double dirpyrThreshold, const double skinprot, const bool gamutlab, float b_l, float t_l, float t_r, float b_r, int choice, int scaleprev)
+SSEFUNCTION void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, float ** dest_a, float ** dest_b,const double * mult, const double dirpyrThreshold, const double skinprot, const bool gamutlab, float b_l, float t_l, float t_r, float b_r, int choice, int scaleprev)
 	{
-	//	StopWatch Stop1("Dirpyr equalizer");
-
-
 		int lastlevel=maxlevel;
 		if(settings->verbose) printf("Dirpyr scaleprev=%i\n",scaleprev);
 		float atten123=(float) settings->level123_cbdl;
@@ -69,6 +61,9 @@ namespace rtengine {
 		if(atten0 > 40.f) atten123=40.f;
 		if(atten0 < 0.f) atten0=0.f;
 		
+		if((t_r-t_l)<0.55f)
+			t_l = t_r + 0.55f;//avoid too small range
+
 		
 		while (lastlevel>0 && fabs(mult[lastlevel-1]-1)<0.001) {
 			lastlevel--;
@@ -97,7 +92,7 @@ namespace rtengine {
 		if(scale < 1) scale=1;

 				
-		dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale, l_a, l_b, false );
+		dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale);
 		
 		level = 1;
 		
@@ -107,48 +102,103 @@ namespace rtengine {
 			scale = (int)(scales[level])/scaleprev;
 			if(scale < 1) scale=1;
 			
-			dirpyr_channel(dirpyrlo[level-1], dirpyrlo[level], srcwidth, srcheight, level, scale, l_a, l_b, false );
+			dirpyr_channel(dirpyrlo[level-1], dirpyrlo[level], srcwidth, srcheight, level, scale);
 			
 			level ++;
 		}
 		
+		float **tmpHue,**tmpChr;
+		if(skinprot != 0.f) {
+			// precalculate hue and chroma, use SSE, if available
+			// by precalculating these values we can greatly reduce the number of calculations in idirpyr_eq_channel()
+			// but we need two additional buffers for this preprocessing
+			tmpHue = new float*[srcheight];
+			for (int i=0; i<srcheight; i++) {
+				tmpHue[i] = new float[srcwidth];
+			}
+#ifdef __SSE2__
+#pragma omp parallel for
+			for(int i=0;i<srcheight;i++) {
+				int j;
+				for(j=0;j<srcwidth-3;j+=4) {
+					_mm_storeu_ps(&tmpHue[i][j],xatan2f(LVFU(l_b[i][j]),LVFU(l_a[i][j])));
+				}
+				for(;j<srcwidth;j++) {
+					tmpHue[i][j] = xatan2f(l_b[i][j],l_a[i][j]);
+				}
+			}
+#else
+#pragma omp parallel for
+			for(int i=0;i<srcheight;i++) {
+				for(int j=0;j<srcwidth;j++) {
+					tmpHue[i][j] = xatan2f(l_b[i][j],l_a[i][j]);
+				}
+			}
+#endif
+			tmpChr = new float*[srcheight];
+			for (int i=0; i<srcheight; i++) {
+				tmpChr[i] = new float[srcwidth];
+			}
+			
+#ifdef __SSE2__
+#pragma omp parallel
+{
+			__m128 div = _mm_set1_ps(327.68f);
+#pragma omp for
+			for(int i=0;i<srcheight;i++) {
+				int j;
+				for(j=0;j<srcwidth-3;j+=4) {
+					_mm_storeu_ps(&tmpChr[i][j], _mm_sqrt_ps(SQRV(LVFU(l_b[i][j]))+SQRV(LVFU(l_a[i][j])))/div);
+				}
+				for(;j<srcwidth;j++) {
+					tmpChr[i][j] = sqrtf(SQR((l_b[i][j]))+SQR((l_a[i][j])))/327.68f;
+				}
+			}
+}
+#else
+#pragma omp parallel for
+			for(int i=0;i<srcheight;i++) {
+				for(int j=0;j<srcwidth;j++) {
+					tmpChr[i][j] = sqrtf(SQR((l_b[i][j]))+SQR((l_a[i][j])))/327.68f;
+				}
+			}
+#endif
+		}
+
 		// with the current implementation of idirpyr_eq_channel we can safely use the buffer from last level as buffer, saves some memory
 		float ** buffer = dirpyrlo[lastlevel-1];
 		
 		for(int level = lastlevel - 1; level > 0; level--)
 		{
-			idirpyr_eq_channel(dirpyrlo[level], dirpyrlo[level-1], buffer, srcwidth, srcheight, level, multi, dirpyrThreshold, l_a, l_b, false, skinprot, gamutlab, b_l,t_l,t_r,b_r, choice );
+			idirpyr_eq_channel(dirpyrlo[level], dirpyrlo[level-1], buffer, srcwidth, srcheight, level, multi, dirpyrThreshold, tmpHue, tmpChr, skinprot, gamutlab, b_l,t_l,t_r,b_r, choice );
 		}
-		
-		
+
 		scale = scales[0];
 		
-		idirpyr_eq_channel(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi, dirpyrThreshold, l_a, l_b, false, skinprot, gamutlab, b_l,t_l,t_r,b_r, choice );
-		
-		
+		idirpyr_eq_channel(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi, dirpyrThreshold, tmpHue, tmpChr, skinprot, gamutlab, b_l,t_l,t_r,b_r, choice );
+
+		if(skinprot != 0.f) {
+			for (int i=0; i<srcheight; i++)
+				delete [] tmpChr[i];
+			delete [] tmpChr;
+			for (int i=0; i<srcheight; i++)
+				delete [] tmpHue[i];
+			delete [] tmpHue;
+		}
+
 		//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-		
+#pragma omp parallel for
 		for (int i=0; i<srcheight; i++) 
 			for (int j=0; j<srcwidth; j++) {
-				dst[i][j] = CLIP(  buffer[i][j] );  // TODO: Really a clip necessary?
-				dest_a[i][j] = l_a[i][j];  
-				dest_b[i][j] = l_b[i][j]; 
-								
+				dst[i][j] = CLIP(buffer[i][j]);  // TODO: Really a clip necessary?
 			}
-		//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-		
-		
-				
-		
-		//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 	}


 	
 	void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float ** dst, int srcwidth, int srcheight, float ** h_p, float ** C_p, const double * mult, const double dirpyrThreshold, const double skinprot, bool execdir,  const bool gamutlab, float b_l, float t_l, float t_r, float b_r, int choice, int scaleprev)
 	{
-		//	StopWatch Stop1("Dirpyr equalizer CAM");
-
 		int lastlevel=maxlevel;
 		if(settings->verbose) printf("CAM dirpyr scaleprev=%i\n",scaleprev);
 		float atten123=(float) settings->level123_cbdl;
@@ -159,6 +209,9 @@ namespace rtengine {
 		if(atten0 > 40.f) atten123=40.f;
 		if(atten0 < 0.f) atten0=0.f;

+		if((t_r-t_l)<0.55f)
+			t_l = t_r + 0.55f;//avoid too small range
+
 		while (fabs(mult[lastlevel-1]-1)<0.001 && lastlevel>0) {
 			lastlevel--;
 			//printf("last level to process %d \n",lastlevel);
@@ -190,7 +243,7 @@ namespace rtengine {
 		int scale = (int)(scales[level])/scaleprev;
 		if(scale < 1) scale=1;

-		dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale, h_p, C_p, true );
+		dirpyr_channel(src, dirpyrlo[0], srcwidth, srcheight, 0, scale);
 		
 		level = 1;
 		
@@ -199,7 +252,7 @@ namespace rtengine {
 			scale = (int)(scales[level])/scaleprev;
 			if(scale < 1) scale=1;

-			dirpyr_channel(dirpyrlo[level-1], dirpyrlo[level], srcwidth, srcheight, level, scale, h_p, C_p, true );
+			dirpyr_channel(dirpyrlo[level-1], dirpyrlo[level], srcwidth, srcheight, level, scale);
 			
 			level ++;
 		}
@@ -210,19 +263,19 @@ namespace rtengine {
 		
 		for(int level = lastlevel - 1; level > 0; level--)
 		{
-			idirpyr_eq_channel(dirpyrlo[level], dirpyrlo[level-1], buffer, srcwidth, srcheight, level, multi, dirpyrThreshold , h_p, C_p, true, skinprot, false, b_l,t_l,t_r,b_r, choice);
+			idirpyr_eq_channelcam(dirpyrlo[level], dirpyrlo[level-1], buffer, srcwidth, srcheight, level, multi, dirpyrThreshold , h_p, C_p, skinprot, b_l,t_l,t_r);
 		}
 		
 		
 		scale = scales[0];
 		
-		idirpyr_eq_channel(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi, dirpyrThreshold,  h_p, C_p, true, skinprot, false, b_l,t_l,t_r,b_r, choice);
+		idirpyr_eq_channelcam(dirpyrlo[0], dst, buffer, srcwidth, srcheight, 0, multi, dirpyrThreshold,  h_p, C_p, skinprot, b_l,t_l,t_r);
 		
 		
 		//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 		if(execdir){
 #ifdef _OPENMP
-#pragma omp parallel for
+#pragma omp parallel for schedule(dynamic,16)
 #endif
 			for (int i=0; i<srcheight; i++) 
 				for (int j=0; j<srcwidth; j++) {
@@ -241,44 +294,36 @@ namespace rtengine {
 	}


-#if defined( __SSE2__ ) && defined( WIN32 )
-__attribute__((force_align_arg_pointer)) void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale, float ** l_a_h, float ** l_b_c, bool ciec)
-#else
-void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale, float ** l_a_h, float ** l_b_c, bool ciec )
-#endif
+SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale)
 {
 		//scale is spacing of directional averaging weights
-		
 		//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 		// calculate weights, compute directionally weighted average
 		
-	int halfwin;
-	int scalewin;
-		
 	if(level > 1) {
 		//generate domain kernel 
 		int domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,2,2,1},{1,2,2,2,1},{1,1,1,1,1}};
-		halfwin=2;
-		scalewin = halfwin*scale;
+		static const int halfwin=2;
+		const int scalewin = halfwin*scale;
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
 {
 #ifdef __SSE2__
 	__m128 thousandv = _mm_set1_ps( 1000.0f );
-	__m128 dirwtv, valv, normv;
-	float domkerv[5][5][4] = {{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}}};
+	__m128 dirwtv, valv, normv, dftemp1v, dftemp2v;
+//	multiplied each value of domkerv by 1000 to avoid multiplication by 1000 inside the loop
+	float domkerv[5][5][4] __attribute__ ((aligned (16))) = {{{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000}},{{1000,1000,1000,1000},{2000,2000,2000,2000},{2000,2000,2000,2000},{2000,2000,2000,2000},{1000,1000,1000,1000}},{{1000,1000,1000,1000},{2000,2000,2000,2000},{2000,2000,2000,2000},{2000,2000,2000,2000},{1000,1000,1000,1000}},{{1000,1000,1000,1000},{2000,2000,2000,2000},{2000,2000,2000,2000},{2000,2000,2000,2000},{1000,1000,1000,1000}},{{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000},{1000,1000,1000,1000}}};
 #endif // __SSE2__
 	int j;
 #ifdef _OPENMP
-#pragma omp for
+#pragma omp for //schedule (dynamic,8)
 #endif
 		for(int i = 0; i < height; i++) {
 			float dirwt;
-			for(j = 0; j < scalewin; j++)
-			{
-				float val=0;
-				float norm=0;
+			for(j = 0; j < scalewin; j++) {
+				float val=0.f;
+				float norm=0.f;
 				
 				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=max(0,j-scalewin); jnbr<=j+scalewin; jnbr+=scale) {
@@ -294,11 +339,13 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 			{
 				valv = _mm_setzero_ps();
 				normv = _mm_setzero_ps();
-				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
-						dirwtv = _mm_loadu_ps((float*)&domkerv[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin]) * (thousandv / (vabsf(LVFU(data_fine[inbr][jnbr])-(LVFU(data_fine[i][j]))) + thousandv));
-						valv += dirwtv*LVFU(data_fine[inbr][jnbr]);
+				dftemp1v = LVFU(data_fine[i][j]);
+				for(int inbr=MAX(0,i-scalewin); inbr<=MIN(height-1,i+scalewin); inbr+=scale) {
+					int indexihlp = (inbr-i)/scale+halfwin;
+					for (int jnbr=j-scalewin, indexjhlp = 0; jnbr<=j+scalewin; jnbr+=scale,indexjhlp++) {
+						dftemp2v = LVFU(data_fine[inbr][jnbr]);
+						dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v-dftemp2v) + thousandv);
+						valv += dirwtv*dftemp2v;
 						normv += dirwtv;
 					}
 				}
@@ -306,8 +353,8 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 			}
 			for(; j < width-scalewin; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
 				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
@@ -321,8 +368,8 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 #else
 			for(; j < width-scalewin; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
 				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
@@ -336,8 +383,8 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 #endif
 			for(; j < width; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
 				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=j-scalewin; jnbr<=min(width-1,j+scalewin); jnbr+=scale) {
@@ -351,29 +398,28 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 		}
 }
 	} else {	// level <=1 means that all values of domker would be 1.0f, so no need for multiplication
-		halfwin = 1;
-		scalewin = halfwin*scale;
+//		const int scalewin = scale;
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
 {
 #ifdef __SSE2__
 	__m128 thousandv = _mm_set1_ps( 1000.0f );
-	__m128 dirwtv, valv, normv;
+	__m128 dirwtv, valv, normv, dftemp1v, dftemp2v;
 #endif // __SSE2__
 	int j;
 #ifdef _OPENMP
-#pragma omp for
+#pragma omp for schedule(dynamic,16)
 #endif
 		for(int i = 0; i < height; i++) {
 			float dirwt;
-			for(j = 0; j < scalewin; j++)
+			for(j = 0; j < scale; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=max(0,j-scalewin); jnbr<=j+scalewin; jnbr+=scale) {
+				for(int inbr=max(0,i-scale); inbr<=min(height-1,i+scale); inbr+=scale) {
+					for (int jnbr=max(0,j-scale); jnbr<=j+scale; jnbr+=scale) {
 						dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr]-data_fine[i][j]));
 						val += dirwt*data_fine[inbr][jnbr];
 						norm += dirwt;
@@ -382,28 +428,29 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 				data_coarse[i][j]=val/norm;//low pass filter
 			}
 #ifdef __SSE2__
-			for(; j < width-scalewin-3; j+=4)
+			for(; j < width-scale-3; j+=4)
 			{
 				valv = _mm_setzero_ps();
 				normv = _mm_setzero_ps();
-				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
-						dirwtv = thousandv / (vabsf(LVFU(data_fine[inbr][jnbr])-(LVFU(data_fine[i][j]))) + thousandv);
-						valv += dirwtv*LVFU(data_fine[inbr][jnbr]);
+				dftemp1v = LVFU(data_fine[i][j]);
+				for(int inbr=MAX(0,i-scale); inbr<=MIN(height-1,i+scale); inbr+=scale) {
+					for (int jnbr=j-scale; jnbr<=j+scale; jnbr+=scale) {
+						dftemp2v = LVFU(data_fine[inbr][jnbr]);
+						dirwtv = thousandv / (vabsf(dftemp2v-dftemp1v) + thousandv);
+						valv += dirwtv*dftemp2v;
 						normv += dirwtv;
 					}
 				}
 				_mm_storeu_ps( &data_coarse[i][j], valv/normv);//low pass filter
 			}

-			for(; j < width-scalewin; j++)
+			for(; j < width-scale; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
+				for(int inbr=max(0,i-scale); inbr<=min(height-1,i+scale); inbr+=scale) {
+					for (int jnbr=j-scale; jnbr<=j+scale; jnbr+=scale) {
 						dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr]-data_fine[i][j]));
 						val += dirwt*data_fine[inbr][jnbr];
 						norm += dirwt;
@@ -412,13 +459,13 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 				data_coarse[i][j]=val/norm;//low pass filter
 			}
 #else
-			for(; j < width-scalewin; j++)
+			for(; j < width-scale; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
+				for(int inbr=max(0,i-scale); inbr<=min(height-1,i+scale); inbr+=scale) {
+					for (int jnbr=j-scale; jnbr<=j+scale; jnbr+=scale) {
 						dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr]-data_fine[i][j]));
 						val += dirwt*data_fine[inbr][jnbr];
 						norm += dirwt;
@@ -429,11 +476,11 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 #endif
 			for(; j < width; j++)
 			{
-				float val=0;
-				float norm=0;
+				float val=0.f;
+				float norm=0.f;
 				
-				for(int inbr=max(0,i-scalewin); inbr<=min(height-1,i+scalewin); inbr+=scale) {
-					for (int jnbr=j-scalewin; jnbr<=min(width-1,j+scalewin); jnbr+=scale) {
+				for(int inbr=max(0,i-scale); inbr<=min(height-1,i+scale); inbr+=scale) {
+					for (int jnbr=j-scale; jnbr<=min(width-1,j+scale); jnbr+=scale) {
 						dirwt = RANGEFN(fabsf(data_fine[inbr][jnbr]-data_fine[i][j]));
 						val += dirwt*data_fine[inbr][jnbr];
 						norm += dirwt;
@@ -448,81 +495,157 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 	
 	//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
-	void ImProcFunctions::idirpyr_eq_channel(float ** data_coarse, float ** data_fine, float ** buffer, int width, int height, int level, float mult[5], const double dirpyrThreshold, float ** l_a_h, float ** l_b_c, bool ciec, const double skinprot, const bool gamutlab, float b_l, float t_l, float t_r, float b_r , int choice)
+	void ImProcFunctions::idirpyr_eq_channel(float ** data_coarse, float ** data_fine, float ** buffer, int width, int height, int level, float mult[5], const double dirpyrThreshold, float ** hue, float ** chrom, const double skinprot, const bool gamutlab, float b_l, float t_l, float t_r, float b_r , int choice)
 	{
-	TMatrix wiprof = iccStore->workingSpaceInverseMatrix (params->icm.working);
-	double wip[3][3] = {
-		{wiprof[0][0],wiprof[0][1],wiprof[0][2]},
-		{wiprof[1][0],wiprof[1][1],wiprof[1][2]},
-		{wiprof[2][0],wiprof[2][1],wiprof[2][2]}
-	};
-	bool highlight = params->toneCurve.hrenabled; //Get the value if "highlight reconstruction" is activated
+	const float skinprotneg = -skinprot;
+	const float factorHard = (1.f - skinprotneg/100.f);

-		float noisehi = 1.33f*noise*dirpyrThreshold/expf(level*log(3.0)), noiselo = 0.66f*noise*dirpyrThreshold/expf(level*log(3.0));
+	float offs;
+	if(skinprot==0.f)
+		offs = 0.f;
+	else
+		offs = -1.f;
+	
+	LUTf irangefn (0x20000);
+	{
+		const float noisehi = 1.33f*noise*dirpyrThreshold/expf(level*log(3.0)), noiselo = 0.66f*noise*dirpyrThreshold/expf(level*log(3.0));
 		//printf("level=%i multlev=%f noisehi=%f noiselo=%f skinprot=%f\n",level,mult[level], noisehi, noiselo, skinprot);
-		LUTf irangefn (0x20000);
 		for (int i=0; i<0x20000; i++) {
 			if (abs(i-0x10000)>noisehi || mult[level]<1.0) {
-				irangefn[i] = mult[level] ;
+				irangefn[i] = mult[level] + offs;
 			} else {
 				if (abs(i-0x10000)<noiselo) {
-					irangefn[i] = 1.f ;
+					irangefn[i] = 1.f + offs ;
 				} else {
-					irangefn[i] = 1.f + (mult[level]-1.f) * (noisehi-abs(i-0x10000))/(noisehi-noiselo+0.01f) ;
+					irangefn[i] = 1.f + offs + (mult[level]-1.f) * (noisehi-abs(i-0x10000))/(noisehi-noiselo+0.01f) ;
 				}
 			}
 		}
-		
+	}
+
+	if(skinprot==0.f)
 #ifdef _OPENMP
-#pragma omp parallel for
+#pragma omp parallel for schedule(dynamic,16)
+#endif
+		for(int i = 0; i < height; i++) {
+			for(int j = 0; j < width; j++) {
+				float hipass = (data_fine[i][j]-data_coarse[i][j]);
+				buffer[i][j] += irangefn[hipass+0x10000] * hipass;
+			}
+		}
+	else if(skinprot > 0.f)
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic,16)
 #endif
 		for(int i = 0; i < height; i++) {
 			for(int j = 0; j < width; j++) {
 				float scale=1.f;
 				float hipass = (data_fine[i][j]-data_coarse[i][j]);
-				if(ciec) {//Ciecam
-					if(skinprot >= 0.) {
-						Color::SkinSatcdbl ((data_fine[i][j])/327.68f, l_a_h[i][j] ,l_b_c[i][j], skinprot, scale, ciec, true, b_l, t_l, t_r, b_r, choice);	
-						buffer[i][j] += (1.f +(irangefn[hipass+0x10000]-1.f)*scale) * hipass ;
-						}
-					else {
-						double skinprotneg = -skinprot;
-						float correct;
-						correct=irangefn[hipass+0x10000];
-						Color::SkinSatcdbl ((data_fine[i][j])/327.68f, l_a_h[i][j],l_b_c[i][j] , skinprotneg, scale, ciec, false, b_l, t_l, t_r, b_r, choice);	
-						if (scale == 1.f) {//image hard
-							//buffer[i][j] += hipass ;
-							buffer[i][j] += (1.f +(correct-1.f)* (1.f- (float) skinprotneg/100.f)) * hipass ;
-							
-						}
-						else {//image soft
-							buffer[i][j] += (1.f +(correct-1.f)) * hipass ;	
-						}		
-					}
+				// These values are precalculated now
+				float modhue = hue[i][j];
+				float modchro = chrom[i][j];
+				Color::SkinSatCbdl ((data_fine[i][j])/327.68f, modhue, modchro, skinprot, scale, true, b_l, t_l, t_r);	
+				buffer[i][j] += (1.f +(irangefn[hipass+0x10000])*scale) * hipass ;
+			}
+		}
+	else
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic,16)
+#endif
+		for(int i = 0; i < height; i++) {
+			for(int j = 0; j < width; j++) {
+				float scale=1.f;
+				float hipass = (data_fine[i][j]-data_coarse[i][j]);
+				// These values are precalculated now
+				float modhue = hue[i][j];
+				float modchro = chrom[i][j];
+				Color::SkinSatCbdl ((data_fine[i][j])/327.68f, modhue, modchro, skinprotneg, scale, false, b_l, t_l, t_r);	
+				float correct = irangefn[hipass+0x10000];
+				if (scale == 1.f) {//image hard
+					buffer[i][j] += (1.f +(correct)* (factorHard)) * hipass ;
+				}
+				else {//image soft with scale < 1 ==> skin
+					buffer[i][j] += (1.f +(correct)) * hipass ;	
+				}		
+			}
+		}
+	}
+		
+	
+	void ImProcFunctions::idirpyr_eq_channelcam(float ** data_coarse, float ** data_fine, float ** buffer, int width, int height, int level, float mult[5], const double dirpyrThreshold, float ** l_a_h, float ** l_b_c, const double skinprot, float b_l, float t_l, float t_r)
+	{
+
+	const float skinprotneg = -skinprot;
+	const float factorHard = (1.f - skinprotneg/100.f);
+
+	float offs;
+	if(skinprot==0.f)
+		offs = 0.f;
+	else
+		offs = -1.f;
+	
+	LUTf irangefn (0x20000);
+	{
+		const float noisehi = 1.33f*noise*dirpyrThreshold/expf(level*log(3.0)), noiselo = 0.66f*noise*dirpyrThreshold/expf(level*log(3.0));
+		//printf("level=%i multlev=%f noisehi=%f noiselo=%f skinprot=%f\n",level,mult[level], noisehi, noiselo, skinprot);
+		for (int i=0; i<0x20000; i++) {
+			if (abs(i-0x10000)>noisehi || mult[level]<1.0) {
+				irangefn[i] = mult[level] + offs;
+			} else {
+				if (abs(i-0x10000)<noiselo) {
+					irangefn[i] = 1.f + offs ;
+				} else {
+					irangefn[i] = 1.f + offs + (mult[level]-1.f) * (noisehi-abs(i-0x10000))/(noisehi-noiselo+0.01f) ;
+				}
+			}
+		}
+	}
+	if(skinprot == 0.f)
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic,16)
+#endif
+		for(int i = 0; i < height; i++) {
+			for(int j = 0; j < width; j++) {
+				float hipass = (data_fine[i][j]-data_coarse[i][j]);
+				buffer[i][j] += irangefn[hipass+0x10000] * hipass ;
+			}
+		}
+	else if(skinprot > 0.f)
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic,16)
+#endif
+		for(int i = 0; i < height; i++) {
+			for(int j = 0; j < width; j++) {
+				float hipass = (data_fine[i][j]-data_coarse[i][j]);
+				float scale=1.f;
+				Color::SkinSatCbdlCam ((data_fine[i][j])/327.68f, l_a_h[i][j] ,l_b_c[i][j], skinprot, scale, true, b_l, t_l, t_r);	
+				buffer[i][j] += (1.f +(irangefn[hipass+0x10000])*scale) * hipass ;
+			}
+		}
+	else
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic,16)
+#endif
+		for(int i = 0; i < height; i++) {
+			for(int j = 0; j < width; j++) {
+				float hipass = (data_fine[i][j]-data_coarse[i][j]);
+				float scale=1.f;
+				float correct;
+				correct=irangefn[hipass+0x10000];
+				Color::SkinSatCbdlCam ((data_fine[i][j])/327.68f, l_a_h[i][j],l_b_c[i][j] , skinprotneg, scale, false, b_l, t_l, t_r);	
+				if (scale == 1.f) {//image hard
+					buffer[i][j] += (1.f +(correct)* factorHard) * hipass ;
+					
+				}
+				else {//image soft
+					buffer[i][j] += (1.f +(correct)) * hipass ;	
+				}		
+			}
+		}
 			//	if(gamutlab) {
 			//	  ImProcFunctions::badpixcam (buffer[i][j], 6.0, 10, 2);//for bad pixels
 			//	}	
 						
-				}
-				else {//lab
-				float modhue=atan2(l_b_c[i][j],l_a_h[i][j]);
-				float modchro=sqrt(SQR((l_b_c[i][j])/327.68f)+SQR((l_a_h[i][j])/327.68f));
-					if(skinprot >= 0.) {
-						Color::SkinSatcdbl ((data_fine[i][j])/327.68f, modhue, modchro, skinprot, scale, ciec, true, b_l, t_l, t_r, b_r, choice);	
-						buffer[i][j] += (1.f +(irangefn[hipass+0x10000]-1.f)*scale) * hipass ;
-					}
-					else {
-						double skinprotneg = -skinprot;
-						float correct;
-						Color::SkinSatcdbl ((data_fine[i][j])/327.68f, modhue, modchro, skinprotneg, scale, ciec, false, b_l, t_l, t_r, b_r, choice);	
-						correct=irangefn[hipass+0x10000];
-						if (scale == 1.f) {//image hard
-							buffer[i][j] += (1.f +(correct-1.f)* (1.f- (float)skinprotneg/100.f)) * hipass ;
-						}
-						else {//image soft with scale < 1 ==> skin
-							buffer[i][j] += (1.f +(correct-1.f)) * hipass ;	
-						}		
-				}
 		/*		if(gamutlab) {//disabled 
 				float Lprov1=(buffer[i][j])/327.68f;
 				float R,G,B;
@@ -542,12 +665,8 @@ void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, i
 					l_b_c[i][j]=327.68f*modchro*sincosval.x;
 				}	
 				*/
-				}
-			}
-		}
-		
 	}
-	
+		
 			//	float hipass = (data_fine[i][j]-data_coarse[i][j]);
 			//	buffer[i][j] += irangefn[hipass+0x10000] * hipass ;