Improvement to the raw Auto CA correction, Issue 2128

2013-12-17 11:51:28 +01:00
parent 7f58a8b6a7
commit 33708560ce
2 changed files with 219 additions and 165 deletions
--- a/rtengine/CA_correct_RT.cc
+++ b/rtengine/CA_correct_RT.cc
@@ -30,7 +30,7 @@
 using namespace std;
 using namespace rtengine;
-int RawImageSource::LinEqSolve(int nDim, float* pfMatr, float* pfVect, float* pfSolution) 
+int RawImageSource::LinEqSolve(int nDim, double* pfMatr, double* pfVect, double* pfSolution)
 {
 //==============================================================================
 // return 1 if system not solving, 0 if system solved
@@ -45,8 +45,8 @@ int RawImageSource::LinEqSolve(int nDim, float* pfMatr, float* pfVect, float* pf
 //
 //==============================================================================
-	float fMaxElem;
+	double fMaxElem;
-	float fAcc;
+	double fAcc;
 	int i, j, k, m;
@@ -104,14 +104,14 @@ int RawImageSource::LinEqSolve(int nDim, float* pfMatr, float* pfVect, float* pf
 void RawImageSource::CA_correct_RT(double cared, double cablue) {
 // multithreaded by Ingo Weyrich
-#define TS 256		// Tile size
+#define TS 128		// Tile size
-#define TSH 128		// Half Tile size
+#define TSH 64		// Half Tile size
 #define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} }
 	volatile double progress = 0.0;
 	if(plistener) plistener->setProgress (progress);
-
+	bool autoCA = (cared==0 && cablue==0);
 	// local variables
 	int width=W, height=H;
 	//temporary array to store simple interpolation of G
@@ -132,6 +132,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 	float		(*blockwt);				// vblsz*hblsz
 	float		(*blockshifts)[3][2];	// vblsz*hblsz*3*2
 	const int border=8;
 	const int border2=16;
@@ -150,15 +151,20 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 	blockwt		= (float (*))			(buffer1);
 	blockshifts	= (float (*)[3][2])		(buffer1+(vblsz*hblsz*sizeof(float)));
-	float	polymat[3][2][256], shiftmat[3][2][16], fitparams[3][2][16];
+	double	polymat[3][2][256], shiftmat[3][2][16], fitparams[3][2][16];
 	for (int i=0; i<256; i++) {polymat[0][0][i] = polymat[0][1][i] = polymat[2][0][i] = polymat[2][1][i] = 0;}
 	for (int i=0; i<16; i++) {shiftmat[0][0][i] = shiftmat[0][1][i] = shiftmat[2][0][i] = shiftmat[2][1][i] = 0;}
 #pragma omp parallel shared(Gtmp,width,height,blockave,blocksqave,blockdenom,blockvar,blockwt,blockshifts,polymat,shiftmat,fitparams)
 {
 	//order of 2d polynomial fit (polyord), and numpar=polyord^2
 	int polyord=4, numpar=16;
 	//number of blocks used in the fit
 	int numblox[3]={0,0,0};
 #pragma omp parallel shared(Gtmp,width,height,blockave,blocksqave,blockdenom,blockvar,blockwt,blockshifts,polymat,shiftmat,fitparams,polyord,numpar)
 {
 	int progresscounter = 0;
 	//number of blocks used in the fit
 	int numbloxthr[3]={0,0,0};
 	int rrmin, rrmax, ccmin, ccmax;
 	int top, left, row, col;
 	int rr, cc, c, indx, indx1, i, j, k, m, n, dir;
@@ -258,7 +264,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 	grblpfv		= (float (*))		(data +	6*sizeof(float)*TS*TS + sizeof(float)*TS*TSH + 10*64);
-	if (cared==0 && cablue==0) {
+	if (autoCA) {
 	// Main algorithm: Tile loop
 #pragma omp for collapse(2) schedule(dynamic) nowait
 	for (top=-border ; top < height; top += TS-border2)
@@ -416,13 +422,15 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 					grblpfv[indx>>1] = glpfv + 0.25*(2.0*rgb[c][indx]+rgb[c][indx+v2]+rgb[c][indx-v2]);
 					grblpfh[indx>>1] = glpfh + 0.25*(2.0*rgb[c][indx]+rgb[c][indx+2]+rgb[c][indx-2]);
 				}
 			areawt[0][0]=areawt[1][0]=1;
 			areawt[0][2]=areawt[1][2]=1;
 			// along line segments, find the point along each segment that minimizes the color variance
 			// averaged over the tile; evaluate for up/down and left/right away from R/B grid point
 			for (rr=8; rr < rr1-8; rr++)
 				for (cc=8+(FC(rr,2)&1), indx=rr*TS+cc, c = FC(rr,cc); cc < cc1-8; cc+=2, indx+=2) {
-					areawt[0][c]=areawt[1][c]=0;
+//					areawt[0][c]=areawt[1][c]=0;
 					//in linear interpolation, color differences are a quadratic function of interpolation position;
 					//solve for the interpolation position that minimizes color difference variance over the tile
@@ -436,7 +444,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 					coeff[0][0][c] += gradwt*deltgrb*deltgrb;
 					coeff[0][1][c] += gradwt*gdiff*deltgrb;
 					coeff[0][2][c] += gradwt*gdiff*gdiff;
-					areawt[0][c]+=1;
+//					areawt[0][c]+=1;
 					//horizontal
 					gdiff=0.3125*(rgb[1][indx+1]-rgb[1][indx-1])+0.09375*(rgb[1][indx+1+TS]-rgb[1][indx-1+TS]+rgb[1][indx+1-TS]-rgb[1][indx-1-TS]);
@@ -447,7 +455,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 					coeff[1][0][c] += gradwt*deltgrb*deltgrb;
 					coeff[1][1][c] += gradwt*gdiff*deltgrb;
 					coeff[1][2][c] += gradwt*gdiff*gdiff;
-					areawt[1][c]+=1;
+//					areawt[1][c]+=1;
 					//	In Mathematica,
 					//  f[x_]=Expand[Total[Flatten[
@@ -558,9 +566,12 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				//data structure: blockshifts[blocknum][R/B][v/h]
 				//if (c==0) printf("vblock= %d hblock= %d blockshiftsmedian= %f \n",vblock,hblock,blockshifts[(vblock)*hblsz+hblock][c][0]);
 			}
 			if(plistener) {
-				progress+=(double)((TS-border2)*(TS-border2))/(2*height*width);
+				progresscounter++;
 				if(progresscounter % 8 == 0)
 #pragma omp critical
 				 {
 					progress+=(double)(8.0*(TS-border2)*(TS-border2))/(2*height*width);
 					if (progress>1.0)
 					{
 						progress=1.0;
@@ -569,6 +580,8 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				}
 			}
 		}
 	//end of diagnostic pass
 #pragma omp critical
 {
@@ -593,7 +606,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				break;
 			}
 		}
-	
+}
 	//printf ("tile variances %f %f %f %f \n",blockvar[0][0],blockvar[1][0],blockvar[0][2],blockvar[1][2] );
@@ -602,6 +615,9 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 	//now prepare for CA correction pass
 	//first, fill border blocks of blockshift array
 	if(processpasstwo) {
 #pragma omp sections
 {
 #pragma omp section
 		for (vblock=1; vblock<vblsz-1; vblock++) {//left and right sides
 			for (c=0; c<3; c+=2) {
 				for (i=0; i<2; i++) {
@@ -610,6 +626,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				}
 			}
 		}
 #pragma omp section
 		for (hblock=0; hblock<hblsz; hblock++) {//top and bottom sides
 			for (c=0; c<3; c+=2) {
 				for (i=0; i<2; i++) {
@@ -618,12 +635,17 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				}
 			}
 		}
 }
 		//end of filling border pixels of blockshift array
 #pragma omp barrier
 		//initialize fit arrays
-		for (i=0; i<256; i++) {polymat[0][0][i] = polymat[0][1][i] = polymat[2][0][i] = polymat[2][1][i] = 0;}
+		double	polymatthr[3][2][256], shiftmatthr[3][2][16];
-		for (i=0; i<16; i++) {shiftmat[0][0][i] = shiftmat[0][1][i] = shiftmat[2][0][i] = shiftmat[2][1][i] = 0;}
+		float bstemp[3][2];
-		//#pragma omp for	collapse(2)
+		//initialize fit arrays
 		for (i=0; i<256; i++) {polymatthr[0][0][i] = polymatthr[0][1][i] = polymatthr[2][0][i] = polymatthr[2][1][i] = 0;}
 		for (i=0; i<16; i++) {shiftmatthr[0][0][i] = shiftmatthr[0][1][i] = shiftmatthr[2][0][i] = shiftmatthr[2][1][i] = 0;}
 #pragma omp for nowait	// nowait to allow the first ready thread to start the critical section as soon as possible
 		for (vblock=1; vblock<vblsz-1; vblock++)
 			for (hblock=1; hblock<hblsz-1; hblock++) {
 				// block 3x3 median of blockshifts for robustness
@@ -645,23 +667,24 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 						PIX_SORT(p[3],p[6]); PIX_SORT(p[1],p[4]); PIX_SORT(p[2],p[5]);
 						PIX_SORT(p[4],p[7]); PIX_SORT(p[4],p[2]); PIX_SORT(p[6],p[4]);
 						PIX_SORT(p[4],p[2]);
-						blockshifts[(vblock)*hblsz+hblock][c][dir] = p[4];
+						bstemp[c][dir] = p[4];
 						//if (c==0 && dir==0) printf("vblock= %d hblock= %d blockshiftsmedian= %f \n",vblock,hblock,p[4]);
 					}
 					//if (verbose) fprintf (stderr,_("tile vshift hshift (%d %d %4f %4f)...\n"),vblock, hblock, blockshifts[(vblock)*hblsz+hblock][c][0], blockshifts[(vblock)*hblsz+hblock][c][1]);
 					//now prepare coefficient matrix; use only data points within two std devs of zero
-					if (SQR(blockshifts[(vblock)*hblsz+hblock][c][0])>4.0*blockvar[0][c] || SQR(blockshifts[(vblock)*hblsz+hblock][c][1])>4.0*blockvar[1][c]) continue;
+					if (SQR(bstemp[c][0])>4.0*blockvar[0][c] || SQR(bstemp[c][1])>4.0*blockvar[1][c])
-					numblox[c] += 1;
+						continue;
 					numbloxthr[c]++;
 					for (dir=0; dir<2; dir++) {
 						for (i=0; i<polyord; i++) {
 							for (j=0; j<polyord; j++) {
 								for (m=0; m<polyord; m++)
 									for (n=0; n<polyord; n++) {
-										polymat[c][dir][numpar*(polyord*i+j)+(polyord*m+n)] += (float)pow((float)vblock,i+m)*pow((float)hblock,j+n)*blockwt[vblock*hblsz+hblock];
+										polymatthr[c][dir][numpar*(polyord*i+j)+(polyord*m+n)] += (float)pow((double)vblock,i+m)*pow((double)hblock,j+n)*blockwt[vblock*hblsz+hblock];
 									}
-								shiftmat[c][dir][(polyord*i+j)] += (float)pow((float)vblock,i)*pow((float)hblock,j)*blockshifts[(vblock)*hblsz+hblock][c][dir]*blockwt[vblock*hblsz+hblock];
+								shiftmatthr[c][dir][(polyord*i+j)] += (float)pow((double)vblock,i)*pow((double)hblock,j)*bstemp[c][dir]*blockwt[vblock*hblsz+hblock];
 							}
 							//if (c==0 && dir==0) {printf("i= %d j= %d shiftmat= %f \n",i,j,shiftmat[c][dir][(polyord*i+j)]);}
 						}//monomials
@@ -669,6 +692,29 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				}//c
 			}//blocks
 #pragma omp critical
 {
 	// now sum up the per thread vars
 	for (i=0; i<256; i++) {
 		polymat[0][0][i] += polymatthr[0][0][i];
 		polymat[0][1][i] += polymatthr[0][1][i];
 		polymat[2][0][i] += polymatthr[2][0][i];
 		polymat[2][1][i] += polymatthr[2][1][i];
 	}
 	for (i=0; i<16; i++) {
 		shiftmat[0][0][i] += shiftmatthr[0][0][i];
 		shiftmat[0][1][i] += shiftmatthr[0][1][i];
 		shiftmat[2][0][i] += shiftmatthr[2][0][i];
 		shiftmat[2][1][i] += shiftmatthr[2][1][i];
 	}
 	numblox[0] += numbloxthr[0];
 	numblox[2] += numbloxthr[2];
 }
 #pragma omp barrier
 #pragma omp single
 {
 		numblox[1]=min(numblox[0],numblox[2]);
 		//if too few data points, restrict the order of the fit to linear
@@ -686,7 +732,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) {
 				for (dir=0; dir<2; dir++) {
 					res = LinEqSolve(numpar, polymat[c][dir], shiftmat[c][dir], fitparams[c][dir]);
 					if (res) {
-						printf ("CA correction pass failed -- can't solve linear equations for color %d direction %d...\n",c,dir);
+						printf("CA correction pass failed -- can't solve linear equations for color %d direction %d...\n",c,dir);
 						processpasstwo = false;
 					}
 				}
@@ -813,7 +859,7 @@ if(processpasstwo) {
 			//end of border fill
 			// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-			if (cared || cablue) {
+			if (!autoCA) {
 				//manual CA correction; use red/blue slider values to set CA shift parameters
 				for (rr=3; rr < rr1-3; rr++)
 					for (row=rr+top, cc=3, indx=rr*TS+cc; cc < cc1-3; cc++, indx++) {
@@ -954,7 +1000,11 @@ if(processpasstwo) {
 			}
 			if(plistener) {
-				progress+=(double)((TS-border2)*(TS-border2))/(2*height*width);
+				progresscounter++;
 				if(progresscounter % 8 == 0)
 #pragma omp critical
 				 {
 					progress+=(double)(8.0*(TS-border2)*(TS-border2))/(2*height*width);
 					if (progress>1.0)
 					{
 						progress=1.0;
@@ -963,6 +1013,8 @@ if(processpasstwo) {
 				}
 			}
 		}
 #pragma omp barrier
 // copy temporary image matrix back to image matrix
 #pragma omp for
@@ -980,6 +1032,8 @@ if(processpasstwo) {
 	free(Gtmp);
 	free(buffer1);
 	free(RawDataTmp);
 	if(plistener)
 		plistener->setProgress(1.0);
 #undef TS
 #undef TSH
--- a/rtengine/rawimagesource.h
+++ b/rtengine/rawimagesource.h
@@ -207,7 +207,7 @@ class RawImageSource : public ImageSource {
        inline  void interpolate_row_rb     (float* ar, float* ab, float* pg, float* cg, float* ng, int i);
        inline  void interpolate_row_rb_mul_pp (float* ar, float* ab, float* pg, float* cg, float* ng, int i, double r_mul, double g_mul, double b_mul, int x1, int width, int skip);
-        int  LinEqSolve( int nDim, float* pfMatr, float* pfVect, float* pfSolution);//Emil's CA auto correction
+        int  LinEqSolve( int nDim, double* pfMatr, double* pfVect, double* pfSolution);//Emil's CA auto correction
        void CA_correct_RT	(double cared, double cablue);
        void ddct8x8s(int isgn, float a[8][8]);
        void processRawWhitepoint (float expos, float preser);  // exposure before interpolation