lab2rgb performance optimizations

2011-04-09 13:11:31 +02:00
parent 60a528d845
commit e972e992ba
1 changed files with 47 additions and 35 deletions
--- a/rtengine/iplab2rgb.cc
+++ b/rtengine/iplab2rgb.cc
@@ -21,7 +21,7 @@
 #include <glibmm.h>
 #include <iccstore.h>
 #include <iccmatrices.h>
-
+#include <mytime.h>
 //#include <sRGBgamutbdy.h>
 #ifdef _OPENMP
@@ -56,60 +56,68 @@ const char* wprofnames[] = {"sRGB", "Adobe RGB", "ProPhoto", "WideGamut", "Bruce
 const int numprof = 7;
 void ImProcFunctions::lab2rgb (LabImage* lab, Image8* image) {
-	
+	//MyTime tBeg,tEnd;
 //   tBeg.set();
 	//gamutmap(lab);
 	if (monitorTransform) {
 	    int ix = 0;
 		float g;
        float* buffer = new float [3*lab->W];
        // cmsDoTransform is relatively expensive
-		// Causes problems on some machines
+        #pragma omp parallel for if (multiThread)
        //#pragma omp parallel for if (multiThread)
 		for (int i=0; i<lab->H; i++) {
            float buffer[3*lab->W];
            const int ix = i * 3 * lab->W;
            int iy = 0;
 			float* rL = lab->L[i];
 			float* ra = lab->a[i];
 			float* rb = lab->b[i];
-			int iy = 0;
+
 			float R,G,B;
            float fy,fx,fz,x_,y_,z_;
 			for (int j=0; j<lab->W; j++) {
-				float fy = (0.00862069 * rL[j])/327.68 + 0.137932; // (L+16)/116
+				fy = (0.00862069 * rL[j]) / 327.68 + 0.137932; // (L+16)/116
-				float fx = (0.002 * ra[j])/327.68 + fy;
+				fx = (0.002 * ra[j]) / 327.68 + fy;
-				float fz = fy - (0.005 * rb[j])/327.68;
+				fz = fy - (0.005 * rb[j]) / 327.68;
-				float x_ = Lab2xyz(fx)*D50x;//should this be 32767???  buffer is short int !!!
+				x_ = Lab2xyz(fx)*D50x;//should this be 32767???  buffer is short int !!!
-				float y_ = Lab2xyz(fy);
+				y_ = Lab2xyz(fy);
-				float z_ = Lab2xyz(fz)*D50z;
+				z_ = Lab2xyz(fz)*D50z;
                buffer[iy++] = CLIP01(x_);
                buffer[iy++] = CLIP01(y_);
                buffer[iy++] = CLIP01(z_);
 			}
            cmsDoTransform (monitorTransform, buffer, image->data + ix, lab->W);
            ix += 3*lab->W;
 		}
-        delete [] buffer;
+        
 	} else {
 		#pragma omp parallel for if (multiThread)
 		for (int i=0; i<lab->H; i++) {
 			float* rL = lab->L[i];
 			float* ra = lab->a[i];
 			float* rb = lab->b[i];
-			int ix = 3*i*lab->W;
+			int ix = i * 3 * lab->W;
-			for (int j=0; j<lab->W; j++) {
+
 			float g;
 			float R,G,B;
            float fy,fx,fz,x_,y_,z_;
 			for (int j=0; j<lab->W; j++) {
 				//float L1=rL[j],a1=ra[j],b1=rb[j];//for testing
-				float fy = (0.00862069 * rL[j])/327.68 + 0.137932; // (L+16)/116
+				fy = (0.00862069 * rL[j]) / 327.68 + 0.137932; // (L+16)/116
-				float fx = (0.002 * ra[j])/327.68 + fy;
+				fx = (0.002 * ra[j]) / 327.68 + fy;
-				float fz = fy - (0.005 * rb[j])/327.68;
+				fz = fy - (0.005 * rb[j]) / 327.68;
-				float x_ = 65535*Lab2xyz(fx)*D50x;
+				x_ = 65535*Lab2xyz(fx)*D50x;
-				float y_ = 65535*Lab2xyz(fy);
+				y_ = 65535*Lab2xyz(fy);
-				float z_ = 65535*Lab2xyz(fz)*D50z;
+				z_ = 65535*Lab2xyz(fz)*D50z;
 				xyz2srgb(x_,y_,z_,R,G,B);
@@ -120,6 +128,9 @@ void ImProcFunctions::lab2rgb (LabImage* lab, Image8* image) {
 			}
 		}
 	}
    //tEnd.set();
    //printf("lab2rgb %i %d\n", lab->W, tEnd.etime(tBeg));
 }
 Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile) {
@@ -141,18 +152,19 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch,
        cmsHTRANSFORM hTransform = cmsCreateTransform (iprof, TYPE_RGB_16, oprof, TYPE_RGB_8, settings->colorimetricIntent,
            cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE );  // NOCACHE is important for thread safety
        lcmsMutex->unlock ();
        int ix = 0;
 		float g;
        short* buffer = new short [3*cw];
        // cmsDoTransform is relatively expensive
-        // Causes problems on some machines
+		#pragma omp parallel for if (multiThread)
 		//#pragma omp parallel for if (multiThread)
        for (int i=cy; i<cy+ch; i++) {
            short buffer [3*cw];
            const int ix = i * 3 * cw;
            int iy = 0;
            float* rL = lab->L[i];
            float* ra = lab->a[i];
            float* rb = lab->b[i];
-            int iy = 0;
+
            for (int j=cx; j<cx+cw; j++) {
 				float fy = (0.00862069 * rL[j])/327.68 + 0.137932; // (L+16)/116
@@ -167,10 +179,10 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch,
                buffer[iy++] = CLIP((int)y_);
                buffer[iy++] = CLIP((int)z_);
            }
            cmsDoTransform (hTransform, buffer, image->data + ix, cw);
            ix += 3*cw;
        }
-        delete [] buffer;
+
        cmsDeleteTransform(hTransform);
    } else {