diff --git a/rtengine/fast_demo.cc b/rtengine/fast_demo.cc index 2b1abe526..c6d8722c8 100644 --- a/rtengine/fast_demo.cc +++ b/rtengine/fast_demo.cc @@ -26,17 +26,15 @@ #include "rawimagesource.h" #include "../rtgui/multilangmgr.h" #include "procparams.h" - -/*#ifdef _OPENMP -#include -#endif*/ +#include "opthelper.h" using namespace std; using namespace rtengine; +#define TS 224 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - +/* LUTf RawImageSource::initInvGrad() { LUTf invGrad (0x10000); @@ -47,46 +45,74 @@ LUTf RawImageSource::initInvGrad() return invGrad; } +*/ +#define INVGRAD(i) (16.0f/SQR(4.0f+i)) +#ifdef __SSE2__ +#define INVGRADV(i) (c16v*_mm_rcp_ps(SQRV(fourv+i))) +#endif +//LUTf RawImageSource::invGrad = RawImageSource::initInvGrad(); -LUTf RawImageSource::invGrad = RawImageSource::initInvGrad(); +SSEFUNCTION void RawImageSource::fast_demosaic(int winx, int winy, int winw, int winh) { + + double progress = 0.0; + const bool plistenerActive = plistener; -void RawImageSource::fast_demosaic(int winx, int winy, int winw, int winh) { //int winx=0, winy=0; //int winw=W, winh=H; if (plistener) { plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::methodstring[RAWParams::fast])); - plistener->setProgress (0.0); + plistener->setProgress (progress); } - float progress = 0.0; - const int bord=4; + const int bord=5; - int clip_pt = 4*65535*initialGain; + float clip_pt = 4*65535*initialGain; //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #ifdef _OPENMP #pragma omp parallel #endif { + + char (*buffer); + float (*greentile); + float (*redtile); + float (*bluetile); +#define CLF 1 + // assign working space + buffer = (char *) malloc(3*sizeof(float)*TS*TS + 3*CLF*64 + 63); + char *data; + data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64); + + greentile = (float (*)) data; //pointers to array + redtile = (float (*)) ((char*)greentile + sizeof(float)*TS*TS + CLF*64); + bluetile = (float (*)) ((char*)redtile + sizeof(float)*TS*TS + CLF*64); + #ifdef _OPENMP -#pragma omp for +#pragma omp sections #endif +{ +#ifdef _OPENMP +#pragma omp section +#endif +{ + //first, interpolate borders using bilinear for (int i=0; i -1) && (i1 < H) && (j1 > -1)) { - int c = FC(i1,j1); - sum[c] += rawData[i1][j1]; - sum[c+3]++; - } + int jmin = max(0,j-1); + for (int i1=imin; i1 -1) && (i1 < H ) && (j1 < W)) { - int c = FC(i1,j1); - sum[c] += rawData[i1][j1]; - sum[c+3]++; - } + int jmax = min(j+2,W); + for (int i1=imin; i1 -1) && (j1 < W) && (i1 > -1)) { - int c = FC(i1,j1); - sum[c] += rawData[i1][j1]; - sum[c+3]++; - } + int c = FC(i1,j1); + sum[c] += rawData[i1][j1]; + sum[c+3]++; } int c=FC(i,j); if (c==1) { @@ -169,13 +195,11 @@ void RawImageSource::fast_demosaic(int winx, int winy, int winw, int winh) { for (int i=H-bord; i -1) && (j1 < W) && (i1 < H)) { - int c = FC(i1,j1); - sum[c] += rawData[i1][j1]; - sum[c+3]++; - } + int c = FC(i1,j1); + sum[c] += rawData[i1][j1]; + sum[c+3]++; } int c=FC(i,j); if (c==1) { @@ -194,91 +218,219 @@ void RawImageSource::fast_demosaic(int winx, int winy, int winw, int winh) { } }//i }//j - - if(plistener) plistener->setProgress(0.05); - + +} +} +#ifdef _OPENMP +#pragma omp single +#endif +{ + if(plistenerActive) { + progress += 0.1; + plistener->setProgress(progress); + } +} //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + int progressCounter = 0; + const double progressInc = 16.0*(1.0-progress)/((H*W)/((TS-4)*(TS-4))); #ifdef _OPENMP -#pragma omp for +#pragma omp for nowait +#endif + for (int top=bord-2; top < H-bord+2; top += TS-(4)) + for (int left=bord-2; left < W-bord+2; left += TS-(4)) { + int bottom = min(top+TS, H-bord+2); + int right = min(left+TS, W-bord+2); + +#ifdef __SSE2__ + int j,cc; + __m128 wtuv, wtdv, wtlv, wtrv; + __m128 greenv,tempv,absv,abs2v; + __m128 onev = _mm_set1_ps( 1.0f ); + __m128 c16v = _mm_set1_ps( 16.0f ); + __m128 fourv = _mm_set1_ps( 4.0f ); + vmask selmask; + vmask andmask = _mm_set_epi32( 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff ); + if(FC(top,left) == 1) + selmask = _mm_set_epi32( 0, 0xffffffff, 0, 0xffffffff ); + else + selmask = _mm_set_epi32( 0xffffffff, 0, 0xffffffff, 0 ); #endif // interpolate G using gradient weights - for (int i=bord; i< H-bord; i++) { + for (int i=top,rr=0; i< bottom; i++,rr++) { float wtu, wtd, wtl, wtr; - for (int j=bord; j < W-bord; j++) { +#ifdef __SSE2__ + selmask = (vmask)_mm_andnot_ps( (__m128)selmask, (__m128)andmask); + + for (j=left,cc=0; j < right-3; j+=4,cc+=4) { + tempv = LVFU(rawData[i][j]); + absv = vabsf(LVFU(rawData[i-1][j])-LVFU(rawData[i+1][j])); + wtuv = INVGRADV(absv+vabsf(tempv-LVFU(rawData[i-2][j]))+vabsf(LVFU(rawData[i-1][j])-LVFU(rawData[i-3][j]))); + wtdv = INVGRADV(absv+vabsf(tempv-LVFU(rawData[i+2][j]))+vabsf(LVFU(rawData[i+1][j])-LVFU(rawData[i+3][j]))); + abs2v = vabsf(LVFU(rawData[i][j-1])-LVFU(rawData[i][j+1])); + wtlv = INVGRADV(abs2v+vabsf(tempv-LVFU(rawData[i][j-2]))+vabsf(LVFU(rawData[i][j-1])-LVFU(rawData[i][j-3]))); + wtrv = INVGRADV(abs2v+vabsf(tempv-LVFU(rawData[i][j+2]))+vabsf(LVFU(rawData[i][j+1])-LVFU(rawData[i][j+3]))); + greenv = (wtuv*LVFU(rawData[i-1][j])+wtdv*LVFU(rawData[i+1][j])+wtlv*LVFU(rawData[i][j-1])+wtrv*LVFU(rawData[i][j+1])) / (wtuv+wtdv+wtlv+wtrv); + _mm_store_ps(&greentile[rr*TS+cc],vself(selmask, greenv, tempv)); + _mm_store_ps(&redtile[rr*TS+cc],tempv); + _mm_store_ps(&bluetile[rr*TS+cc],tempv); + } + for (; j < right; j++,cc++) { if (FC(i,j)==1) { - green[i][j] = rawData[i][j]; - //red[i][j] = green[i][j]; - //blue[i][j] = green[i][j]; + greentile[rr*TS+cc] = rawData[i][j]; - } else { + } else { //compute directional weights using image gradients - wtu=invGrad[(abs(rawData[i+1][j]-rawData[i-1][j])+abs(rawData[i][j]-rawData[i-2][j])+abs(rawData[i-1][j]-rawData[i-3][j])) /4]; - wtd=invGrad[(abs(rawData[i-1][j]-rawData[i+1][j])+abs(rawData[i][j]-rawData[i+2][j])+abs(rawData[i+1][j]-rawData[i+3][j])) /4]; - wtl=invGrad[(abs(rawData[i][j+1]-rawData[i][j-1])+abs(rawData[i][j]-rawData[i][j-2])+abs(rawData[i][j-1]-rawData[i][j-3])) /4]; - wtr=invGrad[(abs(rawData[i][j-1]-rawData[i][j+1])+abs(rawData[i][j]-rawData[i][j+2])+abs(rawData[i][j+1]-rawData[i][j+3])) /4]; + wtu=INVGRAD((abs(rawData[i+1][j]-rawData[i-1][j])+abs(rawData[i][j]-rawData[i-2][j])+abs(rawData[i-1][j]-rawData[i-3][j]))); + wtd=INVGRAD((abs(rawData[i-1][j]-rawData[i+1][j])+abs(rawData[i][j]-rawData[i+2][j])+abs(rawData[i+1][j]-rawData[i+3][j]))); + wtl=INVGRAD((abs(rawData[i][j+1]-rawData[i][j-1])+abs(rawData[i][j]-rawData[i][j-2])+abs(rawData[i][j-1]-rawData[i][j-3]))); + wtr=INVGRAD((abs(rawData[i][j-1]-rawData[i][j+1])+abs(rawData[i][j]-rawData[i][j+2])+abs(rawData[i][j+1]-rawData[i][j+3]))); //store in rgb array the interpolated G value at R/B grid points using directional weighted average - green[i][j]=(wtu*rawData[i-1][j]+wtd*rawData[i+1][j]+wtl*rawData[i][j-1]+wtr*rawData[i][j+1]) / (wtu+wtd+wtl+wtr); - //red[i][j] = green[i][j]; - //blue[i][j] = green[i][j]; - + greentile[rr*TS+cc]=(wtu*rawData[i-1][j]+wtd*rawData[i+1][j]+wtl*rawData[i][j-1]+wtr*rawData[i][j+1]) / (wtu+wtd+wtl+wtr); } + redtile[rr*TS+cc] = rawData[i][j]; + bluetile[rr*TS+cc] = rawData[i][j]; } - //progress+=(double)0.33/(H); - //if(plistener) plistener->setProgress(progress); - } - if(plistener) plistener->setProgress(0.4); + +#else + for (int j=left,cc=0; j < right; j++,cc++) { + if (FC(i,j)==1) { + greentile[rr*TS+cc] = rawData[i][j]; + } else { + //compute directional weights using image gradients + wtu=INVGRAD((abs(rawData[i+1][j]-rawData[i-1][j])+abs(rawData[i][j]-rawData[i-2][j])+abs(rawData[i-1][j]-rawData[i-3][j]))); + wtd=INVGRAD((abs(rawData[i-1][j]-rawData[i+1][j])+abs(rawData[i][j]-rawData[i+2][j])+abs(rawData[i+1][j]-rawData[i+3][j]))); + wtl=INVGRAD((abs(rawData[i][j+1]-rawData[i][j-1])+abs(rawData[i][j]-rawData[i][j-2])+abs(rawData[i][j-1]-rawData[i][j-3]))); + wtr=INVGRAD((abs(rawData[i][j-1]-rawData[i][j+1])+abs(rawData[i][j]-rawData[i][j+2])+abs(rawData[i][j+1]-rawData[i][j+3]))); -#ifdef _OPENMP -#pragma omp for -#endif - for (int i=bord; i< H-bord; i++) { - for (int j=bord+(FC(i,2)&1); j < W-bord; j+=2) { - - int c=FC(i,j); - //interpolate B/R colors at R/B sites - - if (c==0) {//R site - red[i][j] = rawData[i][j]; - blue[i][j] = green[i][j] - 0.25f*((green[i-1][j-1]+green[i-1][j+1]+green[i+1][j+1]+green[i+1][j-1]) - - min(static_cast(clip_pt),rawData[i-1][j-1]+rawData[i-1][j+1]+rawData[i+1][j+1]+rawData[i+1][j-1])); - } else {//B site - red[i][j] = green[i][j] - 0.25f*((green[i-1][j-1]+green[i-1][j+1]+green[i+1][j+1]+green[i+1][j-1]) - - min(static_cast(clip_pt),rawData[i-1][j-1]+rawData[i-1][j+1]+rawData[i+1][j+1]+rawData[i+1][j-1])); - blue[i][j] = rawData[i][j]; + //store in rgb array the interpolated G value at R/B grid points using directional weighted average + greentile[rr*TS+cc]=(wtu*rawData[i-1][j]+wtd*rawData[i+1][j]+wtl*rawData[i][j-1]+wtr*rawData[i][j+1]) / (wtu+wtd+wtl+wtr); } + redtile[rr*TS+cc] = rawData[i][j]; + bluetile[rr*TS+cc] = rawData[i][j]; } - //progress+=(double)0.33/(H); - //if(plistener) plistener->setProgress(progress); - } - if(plistener) plistener->setProgress(0.7); - -#ifdef _OPENMP -#pragma omp barrier #endif + } -#ifdef _OPENMP -#pragma omp for +#ifdef __SSE2__ + __m128 zd25v = _mm_set1_ps(0.25f); + __m128 clip_ptv = _mm_set1_ps( clip_pt ); +#endif + for (int i=top+1,rr=1; i< bottom-1; i++,rr++) { + if (FC(i,left+(FC(i,2)&1)+1)==0) +#ifdef __SSE2__ + for (int j=left+1,cc=1; j < right-1; j+=4,cc+=4) { + //interpolate B/R colors at R/B sites + _mm_storeu_ps(&bluetile[rr*TS+cc], LVFU(greentile[rr*TS+cc]) - zd25v*((LVFU(greentile[(rr-1)*TS+(cc-1)])+LVFU(greentile[(rr-1)*TS+(cc+1)])+LVFU(greentile[(rr+1)*TS+cc+1])+LVFU(greentile[(rr+1)*TS+cc-1])) - + _mm_min_ps(clip_ptv,LVFU(rawData[i-1][j-1])+LVFU(rawData[i-1][j+1])+LVFU(rawData[i+1][j+1])+LVFU(rawData[i+1][j-1])))); + } +#else + for (int cc=(FC(i,2)&1)+1, j=left+cc; j < right-1; j+=2, cc+=2) { + //interpolate B/R colors at R/B sites + bluetile[rr*TS+cc] = greentile[rr*TS+cc] - 0.25f*((greentile[(rr-1)*TS+(cc-1)]+greentile[(rr-1)*TS+(cc+1)]+greentile[(rr+1)*TS+cc+1]+greentile[(rr+1)*TS+cc-1]) - + min(clip_pt,rawData[i-1][j-1]+rawData[i-1][j+1]+rawData[i+1][j+1]+rawData[i+1][j-1])); + } +#endif + else +#ifdef __SSE2__ + for (int j=left+1,cc=1; j < right-1; j+=4,cc+=4) { + //interpolate B/R colors at R/B sites + _mm_storeu_ps(&redtile[rr*TS+cc], LVFU(greentile[rr*TS+cc]) - zd25v*((LVFU(greentile[(rr-1)*TS+cc-1])+LVFU(greentile[(rr-1)*TS+cc+1])+LVFU(greentile[(rr+1)*TS+cc+1])+LVFU(greentile[(rr+1)*TS+cc-1])) - + _mm_min_ps(clip_ptv,LVFU(rawData[i-1][j-1])+LVFU(rawData[i-1][j+1])+LVFU(rawData[i+1][j+1])+LVFU(rawData[i+1][j-1])))); + } +#else + for (int cc=(FC(i,2)&1)+1, j=left+cc; j < right-1; j+=2,cc+=2) { + //interpolate B/R colors at R/B sites + redtile[rr*TS+cc] = greentile[rr*TS+cc] - 0.25f*((greentile[(rr-1)*TS+cc-1]+greentile[(rr-1)*TS+cc+1]+greentile[(rr+1)*TS+cc+1]+greentile[(rr+1)*TS+cc-1]) - + min(clip_pt,rawData[i-1][j-1]+rawData[i-1][j+1]+rawData[i+1][j+1]+rawData[i+1][j-1])); + } +#endif + } + + +#ifdef __SSE2__ + __m128 temp1v,temp2v,greensumv; + selmask = _mm_set_epi32( 0xffffffff, 0, 0xffffffff, 0 ); #endif // interpolate R/B using color differences - for (int i=bord; i< H-bord; i++) { - for (int j=bord+1-(FC(i,2)&1); j < W-bord; j+=2) { - - //interpolate R and B colors at G sites - red[i][j] = green[i][j] - 0.25f*((green[i-1][j]-red[i-1][j])+(green[i+1][j]-red[i+1][j])+ - (green[i][j-1]-red[i][j-1])+(green[i][j+1]-red[i][j+1])); - blue[i][j] = green[i][j] - 0.25f*((green[i-1][j]-blue[i-1][j])+(green[i+1][j]-blue[i+1][j])+ - (green[i][j-1]-blue[i][j-1])+(green[i][j+1]-blue[i][j+1])); + for (int i=top+2, rr=2; i< bottom-2; i++,rr++) { +#ifdef __SSE2__ + for (int cc=2+(FC(i,2)&1), j=left+cc; j < right-2; j+=4,cc+=4) { + // no need to take care about the borders of the tile. There's enough free space. + //interpolate R and B colors at G sites + greenv = LVFU(greentile[rr*TS+cc]); + greensumv = LVFU(greentile[(rr-1)*TS+cc]) + LVFU(greentile[(rr+1)*TS+cc]) + LVFU(greentile[rr*TS+cc-1]) + LVFU(greentile[rr*TS+cc+1]); + + temp1v = LVFU(redtile[rr*TS+cc]); + temp2v = greenv - zd25v*(greensumv - LVFU(redtile[(rr-1)*TS+cc]) - LVFU(redtile[(rr+1)*TS+cc]) - LVFU(redtile[rr*TS+cc-1]) - LVFU(redtile[rr*TS+cc+1])); + +// temp2v = greenv - zd25v*((LVFU(greentile[(rr-1)*TS+cc])-LVFU(redtile[(rr-1)*TS+cc]))+(LVFU(greentile[(rr+1)*TS+cc])-LVFU(redtile[(rr+1)*TS+cc]))+ +// (LVFU(greentile[rr*TS+cc-1])-LVFU(redtile[rr*TS+cc-1]))+(LVFU(greentile[rr*TS+cc+1])-LVFU(redtile[rr*TS+cc+1]))); + _mm_storeu_ps( &redtile[rr*TS+cc], vself(selmask, temp1v, temp2v)); + + temp1v = LVFU(bluetile[rr*TS+cc]); + + temp2v = greenv - zd25v*(greensumv - LVFU(bluetile[(rr-1)*TS+cc]) - LVFU(bluetile[(rr+1)*TS+cc]) - LVFU(bluetile[rr*TS+cc-1]) - LVFU(bluetile[rr*TS+cc+1])); + +// temp2v = greenv - zd25v*((LVFU(greentile[(rr-1)*TS+cc])-LVFU(bluetile[(rr-1)*TS+cc]))+(LVFU(greentile[(rr+1)*TS+cc])-LVFU(bluetile[(rr+1)*TS+cc]))+ +// (LVFU(greentile[rr*TS+cc-1])-LVFU(bluetile[rr*TS+cc-1]))+(LVFU(greentile[rr*TS+cc+1])-LVFU(bluetile[rr*TS+cc+1]))); + _mm_storeu_ps( &bluetile[rr*TS+cc], vself(selmask, temp1v, temp2v)); + } +#else + for (int cc=2+(FC(i,2)&1), j=left+cc; j < right-2; j+=2,cc+=2) { + //interpolate R and B colors at G sites + redtile[rr*TS+cc] = greentile[rr*TS+cc] - 0.25f*((greentile[(rr-1)*TS+cc]-redtile[(rr-1)*TS+cc])+(greentile[(rr+1)*TS+cc]-redtile[(rr+1)*TS+cc])+ + (greentile[rr*TS+cc-1]-redtile[rr*TS+cc-1])+(greentile[rr*TS+cc+1]-redtile[rr*TS+cc+1])); + bluetile[rr*TS+cc] = greentile[rr*TS+cc] - 0.25f*((greentile[(rr-1)*TS+cc]-bluetile[(rr-1)*TS+cc])+(greentile[(rr+1)*TS+cc]-bluetile[(rr+1)*TS+cc])+ + (greentile[rr*TS+cc-1]-bluetile[rr*TS+cc-1])+(greentile[rr*TS+cc+1]-bluetile[rr*TS+cc+1])); + } +#endif } - progress+=(double)0.33/(H); - //if(plistener) plistener->setProgress(progress); - } - if(plistener) plistener->setProgress(0.99); - } // End of parallelization - -#undef bord - + + + for (int i=top+2, rr=2; i< bottom-2; i++,rr++) { +#ifdef __SSE2__ + for (j=left+2, cc=2; j< right-5; j+=4,cc+=4) { + _mm_storeu_ps(&red[i][j], LVFU(redtile[rr*TS+cc])); + _mm_storeu_ps(&green[i][j], LVFU(greentile[rr*TS+cc])); + _mm_storeu_ps(&blue[i][j], LVFU(bluetile[rr*TS+cc])); + } + for (; j< right-2; j++,cc++) { + red[i][j] = redtile[rr*TS+cc]; + green[i][j] = greentile[rr*TS+cc]; + blue[i][j] = bluetile[rr*TS+cc]; + } +#else + for (int j=left+2, cc=2; j< right-2; j++,cc++) { + red[i][j] = redtile[rr*TS+cc]; + green[i][j] = greentile[rr*TS+cc]; + blue[i][j] = bluetile[rr*TS+cc]; + } +#endif + + + } + if(plistenerActive && ((++progressCounter) % 16 == 0)) { +#ifdef _OPENMP +#pragma omp critical (updateprogress) +#endif +{ + progress += progressInc; + progress = min(1.0,progress); + plistener->setProgress (progress); } + } + + } + free(buffer); +} // End of parallelization + if(plistenerActive) plistener->setProgress(1.00); + + + +} +#undef TS +#undef CLF