diff --git a/rtengine/amaze_demosaic_RT.cc b/rtengine/amaze_demosaic_RT.cc index 5c4f3f7a8..8f022b4f7 100644 --- a/rtengine/amaze_demosaic_RT.cc +++ b/rtengine/amaze_demosaic_RT.cc @@ -30,17 +30,17 @@ using namespace rtengine; -void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { +void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { #define HCLIP(x) x //is this still necessary??? //min(clip_pt,x) int width=winw, height=winh; - - + + const float clip_pt = 1/initialGain; #define TS 512 // Tile size; the image is processed in square tiles to lower memory requirements and facilitate multi-threading - + // local variables @@ -69,7 +69,15 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { static const float gquinc[4] = {0.169917f, 0.108947f, 0.069855f, 0.0287182f}; volatile double progress = 0.0; - // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +// Issue 1676 +// Moved from inside the parallel section + if (plistener) { + plistener->setProgressStr ("AMaZE Demosaicing..."); + plistener->setProgress (0.0); + } + #pragma omp parallel { //position of top/left corner of the tile @@ -159,7 +167,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { delm = (float (*)) (buffer + 18*sizeof(float)*TS*TS); rbint = (float (*)) (buffer + 19*sizeof(float)*TS*TS); Dgrbh2 = (float (*)) (buffer + 20*sizeof(float)*TS*TS); - Dgrbv2 = (float (*)) (buffer + 21*sizeof(float)*TS*TS); + Dgrbv2 = (float (*)) (buffer + 21*sizeof(float)*TS*TS); dgintv = (float (*)) (buffer + 22*sizeof(float)*TS*TS); dginth = (float (*)) (buffer + 23*sizeof(float)*TS*TS); Dgrbp1 = (float (*)) (buffer + 24*sizeof(float)*TS*TS); @@ -194,10 +202,6 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - if (plistener) { - plistener->setProgressStr ("AMaZE Demosaicing..."); - plistener->setProgress (0.0); - } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -211,8 +215,11 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { // Main algorithm: Tile loop //#pragma omp parallel for shared(rawData,height,width,red,green,blue) private(top,left) schedule(dynamic) - //code is openmp ready; just have to pull local tile variable declarations inside the tile loop -#pragma omp for schedule(dynamic) nowait + //code is openmp ready; just have to pull local tile variable declarations inside the tile loop + +// Issue 1676 +// use collapse(2) to collapse the 2 loops to one large loop, so there is better scaling +#pragma omp for schedule(dynamic) collapse(2) nowait for (top=winy-16; top < winy+height; top += TS-32) for (left=winx-16; left < winx+width; left += TS-32) { //location of tile bottom edge @@ -224,7 +231,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { //tile height (=TS except for bottom edge of image) int cc1 = right - left; - //tile vars + //tile vars //counters for pixel location in the image int row, col; //min and max row/column in the tile @@ -286,13 +293,13 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { float rbvarp, rbvarm; - + // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // rgb from input CFA data - // rgb values should be floating point number between 0 and 1 - // after white balance multipliers are applied + // rgb values should be floating point number between 0 and 1 + // after white balance multipliers are applied // a 16 pixel border is added to each side of the image // bookkeeping for borders @@ -300,7 +307,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { if (left(winy+height)) {rrmax=winy+height-top;} else {rrmax=rr1;} if (right>(winx+width)) {ccmax=winx+width-left;} else {ccmax=cc1;} - + for (rr=rrmin; rr < rrmax; rr++) for (row=rr+top, cc=ccmin; cc < ccmax; cc++) { col = cc+left; @@ -315,7 +322,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders if (rrmin>0) { - for (rr=0; rr<16; rr++) + for (rr=0; rr<16; rr++) for (cc=ccmin; cc0) { - for (rr=rrmin; rr0 && ccmin>0) { - for (rr=0; rr<16; rr++) + for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc++) { c=FC(rr,cc); rgb[(rr)*TS+cc][c] = (rawData[winy+32-rr][winx+32-cc])/65535.0f; @@ -360,7 +367,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { } } if (rrmax0 && ccmax0) { - for (rr=0; rr<16; rr++) + for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc++) { c=FC(rr,cc); rgb[(rrmax+rr)*TS+cc][c] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f; @@ -392,31 +399,31 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { for (rr=1; rr < rr1-1; rr++) for (cc=1, indx=(rr)*TS+cc; cc < cc1-1; cc++, indx++) { - + delh[indx] = fabs(cfa[indx+1]-cfa[indx-1]); delv[indx] = fabs(cfa[indx+v1]-cfa[indx-v1]); delhsq[indx] = SQR(delh[indx]); delvsq[indx] = SQR(delv[indx]); delp[indx] = fabs(cfa[indx+p1]-cfa[indx-p1]); delm[indx] = fabs(cfa[indx+m1]-cfa[indx-m1]); - + } for (rr=2; rr < rr1-2; rr++) for (cc=2,indx=(rr)*TS+cc; cc < cc1-2; cc++, indx++) { - + dirwts[indx][0] = eps+delv[indx+v1]+delv[indx-v1]+delv[indx];//+fabs(cfa[indx+v2]-cfa[indx-v2]); //vert directional averaging weights dirwts[indx][1] = eps+delh[indx+1]+delh[indx-1]+delh[indx];//+fabs(cfa[indx+2]-cfa[indx-2]); //horizontal weights - + if (FC(rr,cc)&1) { //for later use in diagonal interpolation //Dgrbp1[indx]=2*cfa[indx]-(cfa[indx-p1]+cfa[indx+p1]); //Dgrbm1[indx]=2*cfa[indx]-(cfa[indx-m1]+cfa[indx+m1]); Dgrbpsq1[indx]=(SQR(cfa[indx]-cfa[indx-p1])+SQR(cfa[indx]-cfa[indx+p1])); Dgrbmsq1[indx]=(SQR(cfa[indx]-cfa[indx-m1])+SQR(cfa[indx]-cfa[indx+m1])); - } + } } //t2_init += clock()-t1_init; @@ -452,7 +459,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { if (fabs(1.0f-crd) 0.8*clip_pt || Gintvha > 0.8*clip_pt || Ginthha > 0.8*clip_pt) { //use HA if highlights are (nearly) clipped guar=guha; gdar=gdha; glar=glha; grar=grha; @@ -476,7 +483,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { //differences of interpolations in opposite directions dgintv[indx]=min(SQR(guha-gdha),SQR(guar-gdar)); dginth[indx]=min(SQR(glha-grha),SQR(glar-grar)); - + } //t2_vcdhcd += clock() - t1_vcdhcd; @@ -493,7 +500,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { //choose the smallest variance; this yields a smoother interpolation if (hcdaltvar clip_pt) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for RT implementation if (Gintv > clip_pt) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]; //if (Ginth > pre_mul[c]) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for dcraw implementation @@ -548,7 +555,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { //if (Ginth > pre_mul[c]) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for dcraw implementation //if (Gintv > pre_mul[c]) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]; } - + cddiffsq[indx] = SQR(vcd[indx]-hcd[indx]); } @@ -556,20 +563,20 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc0 && fabs(0.5-diffwt)0.5) Dgrb[indx][0]=vcd[indx]; @@ -722,7 +729,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc