From f7b0dc429c3c69cf8748827ab540de22bc0351a2 Mon Sep 17 00:00:00 2001 From: Ingo Date: Fri, 12 Jul 2013 16:28:46 +0200 Subject: [PATCH] Optimization for Chromatic Abberation Correction, Issue 1923 --- rtengine/CA_correct_RT.cc | 624 ++++++++++++++++++++------------------ 1 file changed, 337 insertions(+), 287 deletions(-) diff --git a/rtengine/CA_correct_RT.cc b/rtengine/CA_correct_RT.cc index f144bca57..8e3fbcdd9 100644 --- a/rtengine/CA_correct_RT.cc +++ b/rtengine/CA_correct_RT.cc @@ -52,10 +52,10 @@ int RawImageSource::LinEqSolve(int nDim, float* pfMatr, float* pfVect, float* pf for(k=0; k<(nDim-1); k++) {// base row of matrix // search of line with max element - fMaxElem = fabs( pfMatr[k*nDim + k] ); + fMaxElem = fabsf( pfMatr[k*nDim + k] ); m = k; for (i=k+1; i(b)) {temp=(a);(a)=(b);(b)=temp;} } + + volatile double progress = 0.0; + if(plistener) plistener->setProgress (progress); - #define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } // local variables int width=W, height=H; //temporary array to store simple interpolation of G float (*Gtmp); Gtmp = (float (*)) calloc ((height)*(width), sizeof *Gtmp); - + + // temporary array to avoid race conflicts, only every second pixel needs to be saved here + float (*RawDataTmp); + RawDataTmp = (float*) malloc( height * width * sizeof(float)/2); + + float blockave[2][3]={{0,0,0},{0,0,0}}, blocksqave[2][3]={{0,0,0},{0,0,0}}, blockdenom[2][3]={{0,0,0},{0,0,0}}, blockvar[2][3]; + + // Because we can't break parallel processing, we need a switch do handle the errors + bool processpasstwo = true; + + //block CA shift values and weight assigned to block + char *buffer1; // vblsz*hblsz*(3*2+1) + float (*blockwt); // vblsz*hblsz + float (*blockshifts)[3][2]; // vblsz*hblsz*3*2 + const int border=8; const int border2=16; + + int vz1, hz1; + if((height+border2)%(TS-border2)==0) vz1=1; else vz1=0; + if((width+border2)%(TS-border2)==0) hz1=1; else hz1=0; + + int vblsz, hblsz; + vblsz=ceil((float)(height+border2)/(TS-border2)+2+vz1); + hblsz=ceil((float)(width+border2)/(TS-border2)+2+hz1); + + buffer1 = (char *) malloc(vblsz*hblsz*(3*2+1)*sizeof(float)); + //merror(buffer1,"CA_correct()"); + memset(buffer1,0,vblsz*hblsz*(3*2+1)*sizeof(float)); + // block CA shifts + blockwt = (float (*)) (buffer1); + blockshifts = (float (*)[3][2]) (buffer1+(vblsz*hblsz*sizeof(float))); + + float polymat[3][2][256], shiftmat[3][2][16], fitparams[3][2][16]; + +#pragma omp parallel shared(Gtmp,width,height,blockave,blocksqave,blockdenom,blockvar,blockwt,blockshifts,polymat,shiftmat,fitparams) +{ //order of 2d polynomial fit (polyord), and numpar=polyord^2 int polyord=4, numpar=16; //number of blocks used in the fit @@ -134,14 +170,14 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { int offset[2][3]; int shifthfloor[3], shiftvfloor[3], shifthceil[3], shiftvceil[3]; //number of tiles in the image - int vblsz, hblsz, vblock, hblock, vz1, hz1; + int vblock, hblock; //int verbose=1; //flag indicating success or failure of polynomial fit int res; //shifts to location of vertical and diagonal neighbors - const int v1=TS, v2=2*TS, /* v3=3*TS,*/ v4=4*TS;//, p1=-TS+1, p2=-2*TS+2, p3=-3*TS+3, m1=TS+1, m2=2*TS+2, m3=3*TS+3; + const int v1=TS, v2=2*TS, v3=3*TS, v4=4*TS;//, p1=-TS+1, p2=-2*TS+2, p3=-3*TS+3, m1=TS+1, m2=2*TS+2, m3=3*TS+3; - float eps=1e-5, eps2=1e-10; //tolerance to avoid dividing by zero + float eps=1e-5f, eps2=1e-10f; //tolerance to avoid dividing by zero //adaptive weights for green interpolation float wtu, wtd, wtl, wtr; @@ -150,7 +186,6 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { //measured CA shift parameters for a tile float CAshift[2][3]; //polynomial fit coefficients - float polymat[3][2][256], shiftmat[3][2][16], fitparams[3][2][16]; //residual CA shift amount within a plaquette float shifthfrac[3], shiftvfrac[3]; //temporary storage for median filter @@ -161,8 +196,9 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { float Ginthfloor, Ginthceil, Gint, RBint, gradwt; //interpolated color difference at edge of plaquette float grbdiffinthfloor, grbdiffinthceil, grbdiffint, grbdiffold; - //data for evaluation of block CA shift variance - float blockave[2][3]={{0,0,0},{0,0,0}}, blocksqave[2][3]={{0,0,0},{0,0,0}}, blockdenom[2][3]={{0,0,0},{0,0,0}}, blockvar[2][3]; + //per thread data for evaluation of block CA shift variance + float blockavethr[2][3]={{0,0,0},{0,0,0}}, blocksqavethr[2][3]={{0,0,0},{0,0,0}}, blockdenomthr[2][3]={{0,0,0},{0,0,0}};//, blockvarthr[2][3]; + //low and high pass 1D filters of G in vertical/horizontal directions float glpfh, glpfv; @@ -176,7 +212,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { char *buffer; // TS*TS*16 //rgb data in a tile - float (*rgb)[3]; // TS*TS*12 + float* rgb[3]; //color differences float (*grbdiff); // TS*TS*4 //green interpolated to optical sample points for R/B @@ -197,64 +233,51 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { /* assign working space; this would not be necessary if the algorithm is part of the larger pre-interpolation processing */ - buffer = (char *) malloc(11*sizeof(float)*TS*TS); + buffer = (char *) malloc(3*sizeof(float)*TS*TS + 8*sizeof(float)*TS*TSH + 10*64 + 64); //merror(buffer,"CA_correct()"); - memset(buffer,0,11*sizeof(float)*TS*TS); + memset(buffer,0,3*sizeof(float)*TS*TS + 8*sizeof(float)*TS*TSH + 10*64 + 64); - // rgb array - rgb = (float (*)[3]) buffer; - grbdiff = (float (*)) (buffer + 3*sizeof(float)*TS*TS); - gshift = (float (*)) (buffer + 4*sizeof(float)*TS*TS); - rbhpfh = (float (*)) (buffer + 5*sizeof(float)*TS*TS); - rbhpfv = (float (*)) (buffer + 6*sizeof(float)*TS*TS); - rblpfh = (float (*)) (buffer + 7*sizeof(float)*TS*TS); - rblpfv = (float (*)) (buffer + 8*sizeof(float)*TS*TS); - grblpfh = (float (*)) (buffer + 9*sizeof(float)*TS*TS); - grblpfv = (float (*)) (buffer + 10*sizeof(float)*TS*TS); + char *data; + data = buffer; +// buffers aligned to size of cacheline +// data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64); - if((height+border2)%(TS-border2)==0) vz1=1; else vz1=0; - if((width+border2)%(TS-border2)==0) hz1=1; else hz1=0; - - vblsz=ceil((float)(height+border2)/(TS-border2)+2+vz1); - hblsz=ceil((float)(width+border2)/(TS-border2)+2+hz1); - //block CA shift values and weight assigned to block - char *buffer1; // vblsz*hblsz*(3*2+1) - float (*blockwt); // vblsz*hblsz - float (*blockshifts)[3][2]; // vblsz*hblsz*3*2 - //float blockshifts[1000][3][2]; //fixed memory allocation - //float blockwt[1000]; //fixed memory allocation - - buffer1 = (char *) malloc(vblsz*hblsz*(3*2+1)*sizeof(float)); - //merror(buffer1,"CA_correct()"); - memset(buffer1,0,vblsz*hblsz*(3*2+1)*sizeof(float)); - // block CA shifts - blockwt = (float (*)) (buffer1); - blockshifts = (float (*)[3][2]) (buffer1+(vblsz*hblsz*sizeof(float))); - - int vctr=0, hctr=0; + // shift the beginning of all arrays but the first by 64 bytes to avoid cache miss conflicts on CPUs which have <=4-way associative L1-Cache + rgb[0] = (float (*)) data; + rgb[1] = (float (*)) (data + 1*sizeof(float)*TS*TS + 1*64); + rgb[2] = (float (*)) (data + 2*sizeof(float)*TS*TS + 2*64); + grbdiff = (float (*)) (data + 3*sizeof(float)*TS*TS + 3*64); + gshift = (float (*)) (data + 3*sizeof(float)*TS*TS + sizeof(float)*TS*TSH + 4*64); + rbhpfh = (float (*)) (data + 4*sizeof(float)*TS*TS + 5*64); + rbhpfv = (float (*)) (data + 4*sizeof(float)*TS*TS + sizeof(float)*TS*TSH + 6*64); + rblpfh = (float (*)) (data + 5*sizeof(float)*TS*TS + 7*64); + rblpfv = (float (*)) (data + 5*sizeof(float)*TS*TS + sizeof(float)*TS*TSH + 8*64); + grblpfh = (float (*)) (data + 6*sizeof(float)*TS*TS + 9*64); + grblpfv = (float (*)) (data + 6*sizeof(float)*TS*TS + sizeof(float)*TS*TSH + 10*64); + if (cared==0 && cablue==0) { // Main algorithm: Tile loop - //#pragma omp parallel for shared(image,height,width) private(top,left,indx,indx1) schedule(dynamic) - for (top=-border, vblock=1; top < height; top += TS-border2, vblock++) { - hctr=0; - vctr++; - for (left=-border, hblock=1; left < width; left += TS-border2, hblock++) { - hctr++; +#pragma omp for collapse(2) schedule(dynamic) nowait + for (top=-border ; top < height; top += TS-border2) + for (left=-border; left < width; left += TS-border2) { + vblock = ((top+border)/(TS-border2))+1; + hblock = ((left+border)/(TS-border2))+1; int bottom = min(top+TS,height+border); int right = min(left+TS, width+border); int rr1 = bottom - top; int cc1 = right - left; //t1_init = clock(); - // rgb from input CFA data - // rgb values should be floating point number between 0 and 1 - // after white balance multipliers are applied if (top<0) {rrmin=border;} else {rrmin=0;} if (left<0) {ccmin=border;} else {ccmin=0;} if (bottom>height) {rrmax=height-top;} else {rrmax=rr1;} if (right>width) {ccmax=width-left;} else {ccmax=cc1;} + + // rgb from input CFA data + // rgb values should be floating point number between 0 and 1 + // after white balance multipliers are applied for (rr=rrmin; rr < rrmax; rr++) for (row=rr+top, cc=ccmin; cc < ccmax; cc++) { @@ -262,7 +285,7 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { c = FC(rr,cc); indx=row*width+col; indx1=rr*TS+cc; - rgb[indx1][c] = (rawData[row][col])/65535.0f; + rgb[c][indx1] = (rawData[row][col])/65535.0f; //rgb[indx1][c] = image[indx][c]/65535.0f;//for dcraw implementation } @@ -272,14 +295,14 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { for (rr=0; rr-1 && row-1 && col>1] = fabsf(fabsf((rgb[1][indx]-rgb[c][indx])-(rgb[1][indx+v4]-rgb[c][indx+v4])) + + fabsf((rgb[1][indx-v4]-rgb[c][indx-v4])-(rgb[1][indx]-rgb[c][indx])) - + fabsf((rgb[1][indx-v4]-rgb[c][indx-v4])-(rgb[1][indx+v4]-rgb[c][indx+v4]))); + rbhpfh[indx>>1] = fabsf(fabsf((rgb[1][indx]-rgb[c][indx])-(rgb[1][indx+4]-rgb[c][indx+4])) + + fabsf((rgb[1][indx-4]-rgb[c][indx-4])-(rgb[1][indx]-rgb[c][indx])) - + fabsf((rgb[1][indx-4]-rgb[c][indx-4])-(rgb[1][indx+4]-rgb[c][indx+4]))); - /*ghpfv = fabs(fabs(rgb[indx][1]-rgb[indx+v4][1])+fabs(rgb[indx][1]-rgb[indx-v4][1]) - - fabs(rgb[indx+v4][1]-rgb[indx-v4][1])); - ghpfh = fabs(fabs(rgb[indx][1]-rgb[indx+4][1])+fabs(rgb[indx][1]-rgb[indx-4][1]) - - fabs(rgb[indx+4][1]-rgb[indx-4][1])); - rbhpfv[indx] = fabs(ghpfv - fabs(fabs(rgb[indx][c]-rgb[indx+v4][c])+fabs(rgb[indx][c]-rgb[indx-v4][c]) - - fabs(rgb[indx+v4][c]-rgb[indx-v4][c]))); - rbhpfh[indx] = fabs(ghpfh - fabs(fabs(rgb[indx][c]-rgb[indx+4][c])+fabs(rgb[indx][c]-rgb[indx-4][c]) - - fabs(rgb[indx+4][c]-rgb[indx-4][c])));*/ + /*ghpfv = fabsf(fabsf(rgb[indx][1]-rgb[indx+v4][1])+fabsf(rgb[indx][1]-rgb[indx-v4][1]) - + fabsf(rgb[indx+v4][1]-rgb[indx-v4][1])); + ghpfh = fabsf(fabsf(rgb[indx][1]-rgb[indx+4][1])+fabsf(rgb[indx][1]-rgb[indx-4][1]) - + fabsf(rgb[indx+4][1]-rgb[indx-4][1])); + rbhpfv[indx] = fabsf(ghpfv - fabsf(fabsf(rgb[indx][c]-rgb[indx+v4][c])+fabsf(rgb[indx][c]-rgb[indx-v4][c]) - + fabsf(rgb[indx+v4][c]-rgb[indx-v4][c]))); + rbhpfh[indx] = fabsf(ghpfh - fabsf(fabsf(rgb[indx][c]-rgb[indx+4][c])+fabsf(rgb[indx][c]-rgb[indx-4][c]) - + fabsf(rgb[indx+4][c]-rgb[indx-4][c])));*/ - glpfv = 0.25*(2*rgb[indx][1]+rgb[indx+v2][1]+rgb[indx-v2][1]); - glpfh = 0.25*(2*rgb[indx][1]+rgb[indx+2][1]+rgb[indx-2][1]); - rblpfv[indx] = eps+fabs(glpfv - 0.25*(2*rgb[indx][c]+rgb[indx+v2][c]+rgb[indx-v2][c])); - rblpfh[indx] = eps+fabs(glpfh - 0.25*(2*rgb[indx][c]+rgb[indx+2][c]+rgb[indx-2][c])); - grblpfv[indx] = glpfv + 0.25*(2*rgb[indx][c]+rgb[indx+v2][c]+rgb[indx-v2][c]); - grblpfh[indx] = glpfh + 0.25*(2*rgb[indx][c]+rgb[indx+2][c]+rgb[indx-2][c]); + glpfv = 0.25*(2.0*rgb[1][indx]+rgb[1][indx+v2]+rgb[1][indx-v2]); + glpfh = 0.25*(2.0*rgb[1][indx]+rgb[1][indx+2]+rgb[1][indx-2]); + rblpfv[indx>>1] = eps+fabsf(glpfv - 0.25*(2.0*rgb[c][indx]+rgb[c][indx+v2]+rgb[c][indx-v2])); + rblpfh[indx>>1] = eps+fabsf(glpfh - 0.25*(2.0*rgb[c][indx]+rgb[c][indx+2]+rgb[c][indx-2])); + grblpfv[indx>>1] = glpfv + 0.25*(2.0*rgb[c][indx]+rgb[c][indx+v2]+rgb[c][indx-v2]); + grblpfh[indx>>1] = glpfh + 0.25*(2.0*rgb[c][indx]+rgb[c][indx+2]+rgb[c][indx-2]); } // along line segments, find the point along each segment that minimizes the color variance // averaged over the tile; evaluate for up/down and left/right away from R/B grid point for (rr=8; rr < rr1-8; rr++) for (cc=8+(FC(rr,2)&1), indx=rr*TS+cc, c = FC(rr,cc); cc < cc1-8; cc+=2, indx+=2) { - + areawt[0][c]=areawt[1][c]=0; - + //in linear interpolation, color differences are a quadratic function of interpolation position; //solve for the interpolation position that minimizes color difference variance over the tile - + //vertical - gdiff=0.3125*(rgb[indx+TS][1]-rgb[indx-TS][1])+0.09375*(rgb[indx+TS+1][1]-rgb[indx-TS+1][1]+rgb[indx+TS-1][1]-rgb[indx-TS-1][1]); - deltgrb=(rgb[indx][c]-rgb[indx][1]); - - gradwt=fabs(0.25*rbhpfv[indx]+0.125*(rbhpfv[indx+2]+rbhpfv[indx-2]) )*(grblpfv[indx-v2]+grblpfv[indx+v2])/(eps+0.1*grblpfv[indx-v2]+rblpfv[indx-v2]+0.1*grblpfv[indx+v2]+rblpfv[indx+v2]); - + gdiff=0.3125*(rgb[1][indx+TS]-rgb[1][indx-TS])+0.09375*(rgb[1][indx+TS+1]-rgb[1][indx-TS+1]+rgb[1][indx+TS-1]-rgb[1][indx-TS-1]); + deltgrb=(rgb[c][indx]-rgb[1][indx]); + + gradwt=fabsf(0.25*rbhpfv[indx>>1]+0.125*(rbhpfv[(indx>>1)+1]+rbhpfv[(indx>>1)-1]) )*(grblpfv[(indx>>1)-v1]+grblpfv[(indx>>1)+v1])/(eps+0.1*grblpfv[(indx>>1)-v1]+rblpfv[(indx>>1)-v1]+0.1*grblpfv[(indx>>1)+v1]+rblpfv[(indx>>1)+v1]); + coeff[0][0][c] += gradwt*deltgrb*deltgrb; coeff[0][1][c] += gradwt*gdiff*deltgrb; coeff[0][2][c] += gradwt*gdiff*gdiff; areawt[0][c]+=1; - - + //horizontal - gdiff=0.3125*(rgb[indx+1][1]-rgb[indx-1][1])+0.09375*(rgb[indx+1+TS][1]-rgb[indx-1+TS][1]+rgb[indx+1-TS][1]-rgb[indx-1-TS][1]); - deltgrb=(rgb[indx][c]-rgb[indx][1]); - - gradwt=fabs(0.25*rbhpfh[indx]+0.125*(rbhpfh[indx+v2]+rbhpfh[indx-v2]) )*(grblpfh[indx-2]+grblpfh[indx+2])/(eps+0.1*grblpfh[indx-2]+rblpfh[indx-2]+0.1*grblpfh[indx+2]+rblpfh[indx+2]); - + gdiff=0.3125*(rgb[1][indx+1]-rgb[1][indx-1])+0.09375*(rgb[1][indx+1+TS]-rgb[1][indx-1+TS]+rgb[1][indx+1-TS]-rgb[1][indx-1-TS]); + deltgrb=(rgb[c][indx]-rgb[1][indx]); + + gradwt=fabsf(0.25*rbhpfh[indx>>1]+0.125*(rbhpfh[(indx>>1)+v1]+rbhpfh[(indx>>1)-v1]) )*(grblpfh[(indx>>1)-1]+grblpfh[(indx>>1)+1])/(eps+0.1*grblpfh[(indx>>1)-1]+rblpfh[(indx>>1)-1]+0.1*grblpfh[(indx>>1)+1]+rblpfh[(indx>>1)+1]); + coeff[1][0][c] += gradwt*deltgrb*deltgrb; coeff[1][1][c] += gradwt*gdiff*deltgrb; coeff[1][2][c] += gradwt*gdiff*gdiff; areawt[1][c]+=1; - - + // In Mathematica, // f[x_]=Expand[Total[Flatten[ // ((1-x) RotateLeft[Gint,shift1]+x RotateLeft[Gint,shift2]-cfapad)^2[[dv;;-1;;2,dh;;-1;;2]]]]]; @@ -515,21 +536,17 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { //data structure = CAshift[vert/hor][color] //j=0=vert, 1=hor - - - offset[j][c]=floor(CAshift[j][c]); + //offset gives NW corner of square containing the min; j=0=vert, 1=hor - if (fabs(CAshift[j][c])<2.0) { - blockave[j][c] += CAshift[j][c]; - blocksqave[j][c] += SQR(CAshift[j][c]); - blockdenom[j][c] += 1; + if (fabsf(CAshift[j][c])<2.0f) { + blockavethr[j][c] += CAshift[j][c]; + blocksqavethr[j][c] += SQR(CAshift[j][c]); + blockdenomthr[j][c] += 1; } }//vert/hor }//color - - /* CAshift[j][c] are the locations that minimize color difference variances; This is the approximate _optical_ location of the R/B pixels */ @@ -542,20 +559,38 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { //if (c==0) printf("vblock= %d hblock= %d blockshiftsmedian= %f \n",vblock,hblock,blockshifts[(vblock)*hblsz+hblock][c][0]); } - if(plistener) plistener->setProgress(0.5*fabs((float)top/height)); - + if(plistener) { + progress+=(double)((TS-border2)*(TS-border2))/(2*height*width); + if (progress>1.0) + { + progress=1.0; + } + plistener->setProgress(progress); + } } - } + //end of diagnostic pass - +#pragma omp critical +{ + for (j=0; j<2; j++) + for (c=0; c<3; c+=2) { + blockdenom[j][c] += blockdenomthr[j][c]; + blocksqave[j][c] += blocksqavethr[j][c]; + blockave[j][c] += blockavethr[j][c]; + } +} +#pragma omp barrier + +#pragma omp single +{ for (j=0; j<2; j++) for (c=0; c<3; c+=2) { if (blockdenom[j][c]) { blockvar[j][c] = blocksqave[j][c]/blockdenom[j][c]-SQR(blockave[j][c]/blockdenom[j][c]); } else { + processpasstwo = false; printf ("blockdenom vanishes \n"); - free(buffer1); - return; + break; } } @@ -566,120 +601,122 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { //now prepare for CA correction pass //first, fill border blocks of blockshift array - for (vblock=1; vblock4.0*blockvar[0][c] || SQR(blockshifts[(vblock)*hblsz+hblock][c][1])>4.0*blockvar[1][c]) continue; - numblox[c] += 1; - for (dir=0; dir<2; dir++) { - for (i=0; i4.0*blockvar[0][c] || SQR(blockshifts[(vblock)*hblsz+hblock][c][1])>4.0*blockvar[1][c]) continue; + numblox[c] += 1; + for (dir=0; dir<2; dir++) { + for (i=0; iheight) {rrmax=height-top;} else {rrmax=rr1;} if (right>width) {ccmax=width-left;} else {ccmax=cc1;} - - + + // rgb from input CFA data + // rgb values should be floating point number between 0 and 1 + // after white balance multipliers are applied + for (rr=rrmin; rr < rrmax; rr++) for (row=rr+top, cc=ccmin; cc < ccmax; cc++) { col = cc+left; @@ -687,10 +724,10 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { indx=row*width+col; indx1=rr*TS+cc; //rgb[indx1][c] = image[indx][c]/65535.0f; - rgb[indx1][c] = (rawData[row][col])/65535.0f; + rgb[c][indx1] = (rawData[row][col])/65535.0f; //rgb[indx1][c] = image[indx][c]/65535.0f;//for dcraw implementation - if ((c&1)==0) rgb[indx1][1] = Gtmp[indx]; + if ((c&1)==0) rgb[1][indx1] = Gtmp[indx]; } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders @@ -698,36 +735,36 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { for (rr=0; rr0) { for (rr=rrmin; rr0 && ccmax0) { for (rr=0; rr-1 && row-1 && col>1]=Gint-rgb[c][(rr)*TS+cc]; + gshift[((rr)*TS+cc)>>1]=Gint; } for (rr=8; rr < rr1-8; rr++) @@ -868,72 +905,85 @@ void RawImageSource::CA_correct_RT(double cared, double cablue) { //if (rgb[indx][c]>clip_pt || Gtmp[indx]>clip_pt) continue; - grbdiffold = rgb[indx][1]-rgb[indx][c]; + grbdiffold = rgb[1][indx]-rgb[c][indx]; //interpolate color difference from optical R/B locations to grid locations - grbdiffinthfloor=(1-shifthfrac[c]/2)*grbdiff[indx]+(shifthfrac[c]/2)*grbdiff[indx-2*GRBdir[1][c]]; - grbdiffinthceil=(1-shifthfrac[c]/2)*grbdiff[(rr-2*GRBdir[0][c])*TS+cc]+(shifthfrac[c]/2)*grbdiff[(rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c]]; + grbdiffinthfloor=(1.0f-shifthfrac[c]/2.0f)*grbdiff[indx>>1]+(shifthfrac[c]/2.0f)*grbdiff[(indx-2*GRBdir[1][c])>>1]; + grbdiffinthceil=(1.0f-shifthfrac[c]/2.0f)*grbdiff[((rr-2*GRBdir[0][c])*TS+cc)>>1]+(shifthfrac[c]/2.0f)*grbdiff[((rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c])>>1]; //grbdiffint is bilinear interpolation of G-R/G-B at grid point - grbdiffint=(1-shiftvfrac[c]/2)*grbdiffinthfloor+(shiftvfrac[c]/2)*grbdiffinthceil; + grbdiffint=(1.0f-shiftvfrac[c]/2.0f)*grbdiffinthfloor+(shiftvfrac[c]/2.0f)*grbdiffinthceil; //now determine R/B at grid points using interpolated color differences and interpolated G value at grid point - RBint=rgb[indx][1]-grbdiffint; + RBint=rgb[1][indx]-grbdiffint; - if (fabs(RBint-rgb[indx][c])<0.25*(RBint+rgb[indx][c])) { - if (fabs(grbdiffold)>fabs(grbdiffint) ) { - rgb[indx][c]=RBint; + if (fabsf(RBint-rgb[c][indx])<0.25f*(RBint+rgb[c][indx])) { + if (fabsf(grbdiffold)>fabsf(grbdiffint) ) { + rgb[c][indx]=RBint; } } else { //gradient weights using difference from G at CA shift points and G at grid points - p[0]=1/(eps+fabs(rgb[indx][1]-gshift[indx])); - p[1]=1/(eps+fabs(rgb[indx][1]-gshift[indx-2*GRBdir[1][c]])); - p[2]=1/(eps+fabs(rgb[indx][1]-gshift[(rr-2*GRBdir[0][c])*TS+cc])); - p[3]=1/(eps+fabs(rgb[indx][1]-gshift[(rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c]])); + p[0]=1.0f/(eps+fabsf(rgb[1][indx]-gshift[indx>>1])); + p[1]=1.0f/(eps+fabsf(rgb[1][indx]-gshift[(indx-2*GRBdir[1][c])>>1])); + p[2]=1.0f/(eps+fabsf(rgb[1][indx]-gshift[((rr-2*GRBdir[0][c])*TS+cc)>>1])); + p[3]=1.0f/(eps+fabsf(rgb[1][indx]-gshift[((rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c])>>1])); - grbdiffint = (p[0]*grbdiff[indx]+p[1]*grbdiff[indx-2*GRBdir[1][c]]+ - p[2]*grbdiff[(rr-2*GRBdir[0][c])*TS+cc]+p[3]*grbdiff[(rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c]])/(p[0]+p[1]+p[2]+p[3]); + grbdiffint = (p[0]*grbdiff[indx>>1]+p[1]*grbdiff[(indx-2*GRBdir[1][c])>>1]+ + p[2]*grbdiff[((rr-2*GRBdir[0][c])*TS+cc)>>1]+p[3]*grbdiff[((rr-2*GRBdir[0][c])*TS+cc-2*GRBdir[1][c])>>1])/(p[0]+p[1]+p[2]+p[3]); //now determine R/B at grid points using interpolated color differences and interpolated G value at grid point - if (fabs(grbdiffold)>fabs(grbdiffint) ) { - rgb[indx][c]=rgb[indx][1]-grbdiffint; + if (fabsf(grbdiffold)>fabsf(grbdiffint) ) { + rgb[c][indx]=rgb[1][indx]-grbdiffint; } } //if color difference interpolation overshot the correction, just desaturate if (grbdiffold*grbdiffint<0) { - rgb[indx][c]=rgb[indx][1]-0.5*(grbdiffold+grbdiffint); + rgb[c][indx]=rgb[1][indx]-0.5f*(grbdiffold+grbdiffint); } } - // copy CA corrected results back to image matrix - for (rr=border; rr < rr1-border; rr++) - for (row=rr+top, cc=border+(FC(rr,2)&1); cc < cc1-border; cc+=2) { + // copy CA corrected results to temporary image matrix + for (rr=border; rr < rr1-border; rr++){ + c = FC(rr+top, left + border+FC(rr+top,2)&1); + for (row=rr+top, cc=border+(FC(rr,2)&1),indx=(row*width+cc+left)>>1; cc < cc1-border; cc+=2,indx++) { col = cc + left; - indx = row*width + col; - c = FC(row,col); - - rawData[row][col] = 65535.0f*rgb[(rr)*TS+cc][c] + 0.5f; + RawDataTmp[indx] = 65535.0f*rgb[c][(rr)*TS+cc] + 0.5f; //image[indx][c] = CLIP((int)(65535.0*rgb[(rr)*TS+cc][c] + 0.5));//for dcraw implementation + } + } - } - - if(plistener) plistener->setProgress(0.5+0.5*fabs((float)top/height)); - + if(plistener) { + progress+=(double)((TS-border2)*(TS-border2))/(2*height*width); + if (progress>1.0) + { + progress=1.0; + } + plistener->setProgress(progress); + } } + } + free(buffer); + +#pragma omp barrier + +// copy temporary image matrix back to image matrix +#pragma omp for + for(row=0;row>1;col