//////////////////////////////////////////////////////////////// // // AMaZE demosaic algorithm // (Aliasing Minimization and Zipper Elimination) // // copyright (c) 2008-2010 Emil Martinec // // incorporating ideas of Luis Sanz Rodrigues and Paul Lee // // code dated: May 27, 2010 // // amaze_interpolate_RT.cc is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // //////////////////////////////////////////////////////////////// #include "rtengine.h" #include "rawimagesource.h" #include "rt_math.h" #include "../rtgui/multilangmgr.h" #include "procparams.h" #include "sleef.c" #include "opthelper.h" namespace rtengine { SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { #define HCLIP(x) x //is this still necessary??? //min(clip_pt,x) int width=winw, height=winh; const float clip_pt = 1/initialGain; const float clip_pt8 = 0.8f/initialGain; #define TS 160 // Tile size; the image is processed in square tiles to lower memory requirements and facilitate multi-threading #define TSH 80 // half of Tile size // local variables //offset of R pixel within a Bayer quartet int ex, ey; //shifts of pointer value to access pixels in vertical and diagonal directions static const int v1=TS, v2=2*TS, v3=3*TS, p1=-TS+1, p2=-2*TS+2, p3=-3*TS+3, m1=TS+1, m2=2*TS+2, m3=3*TS+3; //tolerance to avoid dividing by zero static const float eps=1e-5, epssq=1e-10; //tolerance to avoid dividing by zero //adaptive ratios threshold static const float arthresh=0.75; //nyquist texture test threshold static const float nyqthresh=0.5; //gaussian on 5x5 quincunx, sigma=1.2 static const float gaussodd[4] = {0.14659727707323927f, 0.103592713382435f, 0.0732036125103057f, 0.0365543548389495f}; //gaussian on 5x5, sigma=1.2 static const float gaussgrad[6] = {0.07384411893421103f, 0.06207511968171489f, 0.0521818194747806f, 0.03687419286733595f, 0.03099732204057846f, 0.018413194161458882f}; //gaussian on 5x5 alt quincunx, sigma=1.5 static const float gausseven[2] = {0.13719494435797422f, 0.05640252782101291f}; //guassian on quincunx grid static const float gquinc[4] = {0.169917f, 0.108947f, 0.069855f, 0.0287182f}; volatile double progress = 0.0; // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Issue 1676 // Moved from inside the parallel section if (plistener) { plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::methodstring[RAWParams::amaze])); plistener->setProgress (0.0); } struct s_hv { float h; float v; }; #pragma omp parallel { int progresscounter=0; //position of top/left corner of the tile int top, left; // beginning of storage block for tile char *buffer; // green values float (*rgbgreen); // sum of square of horizontal gradient and square of vertical gradient float (*delhvsqsum); // gradient based directional weights for interpolation float (*dirwts0); float (*dirwts1); // vertically interpolated color differences G-R, G-B float (*vcd); // horizontally interpolated color differences float (*hcd); // alternative vertical interpolation float (*vcdalt); // alternative horizontal interpolation float (*hcdalt); // square of average color difference float (*cddiffsq); // weight to give horizontal vs vertical interpolation float (*hvwt); // final interpolated color difference float (*Dgrb)[TS*TSH]; // float (*Dgrb)[2]; // gradient in plus (NE/SW) direction float (*delp); // gradient in minus (NW/SE) direction float (*delm); // diagonal interpolation of R+B float (*rbint); // horizontal and vertical curvature of interpolated G (used to refine interpolation in Nyquist texture regions) s_hv (*Dgrb2); // difference between up/down interpolations of G float (*dgintv); // difference between left/right interpolations of G float (*dginth); // diagonal (plus) color difference R-B or G1-G2 // float (*Dgrbp1); // diagonal (minus) color difference R-B or G1-G2 // float (*Dgrbm1); float (*Dgrbsq1m); float (*Dgrbsq1p); // s_mp (*Dgrbsq1); // square of diagonal color difference // float (*Dgrbpsq1); // square of diagonal color difference // float (*Dgrbmsq1); // tile raw data float (*cfa); // relative weight for combining plus and minus diagonal interpolations float (*pmwt); // interpolated color difference R-B in minus and plus direction float (*rbm); float (*rbp); // nyquist texture flag 1=nyquist, 0=not nyquist char (*nyquist); #define CLF 1 // assign working space buffer = (char *) calloc(22*sizeof(float)*TS*TS + sizeof(char)*TS*TSH+23*CLF*64 + 63, 1); char *data; data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64); //merror(buffer,"amaze_interpolate()"); rgbgreen = (float (*)) data; //pointers to array delhvsqsum = (float (*)) ((char*)rgbgreen + sizeof(float)*TS*TS + CLF*64); dirwts0 = (float (*)) ((char*)delhvsqsum + sizeof(float)*TS*TS + CLF*64); dirwts1 = (float (*)) ((char*)dirwts0 + sizeof(float)*TS*TS + CLF*64); vcd = (float (*)) ((char*)dirwts1 + sizeof(float)*TS*TS + CLF*64); hcd = (float (*)) ((char*)vcd + sizeof(float)*TS*TS + CLF*64); vcdalt = (float (*)) ((char*)hcd + sizeof(float)*TS*TS + CLF*64); hcdalt = (float (*)) ((char*)vcdalt + sizeof(float)*TS*TS + CLF*64); cddiffsq = (float (*)) ((char*)hcdalt + sizeof(float)*TS*TS + CLF*64); hvwt = (float (*)) ((char*)cddiffsq + sizeof(float)*TS*TS + CLF*64); Dgrb = (float (*)[TS*TSH]) ((char*)hvwt + sizeof(float)*TS*TSH + CLF*64); delp = (float (*)) ((char*)Dgrb + sizeof(float)*TS*TS + CLF*64); delm = (float (*)) ((char*)delp + sizeof(float)*TS*TSH + CLF*64); rbint = (float (*)) ((char*)delm + sizeof(float)*TS*TSH + CLF*64); Dgrb2 = (s_hv (*)) ((char*)rbint + sizeof(float)*TS*TSH + CLF*64); dgintv = (float (*)) ((char*)Dgrb2 + sizeof(float)*TS*TS + CLF*64); dginth = (float (*)) ((char*)dgintv + sizeof(float)*TS*TS + CLF*64); Dgrbsq1m = (float (*)) ((char*)dginth + sizeof(float)*TS*TS + CLF*64); Dgrbsq1p = (float (*)) ((char*)Dgrbsq1m + sizeof(float)*TS*TSH + CLF*64); cfa = (float (*)) ((char*)Dgrbsq1p + sizeof(float)*TS*TSH + CLF*64); pmwt = (float (*)) ((char*)cfa + sizeof(float)*TS*TS + CLF*64); rbm = (float (*)) ((char*)pmwt + sizeof(float)*TS*TSH + CLF*64); rbp = (float (*)) ((char*)rbm + sizeof(float)*TS*TSH + CLF*64); nyquist = (char (*)) ((char*)rbp + sizeof(float)*TS*TSH + CLF*64); #undef CLF // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //determine GRBG coset; (ey,ex) is the offset of the R subarray if (FC(0,0)==1) {//first pixel is G if (FC(0,1)==0) {ey=0; ex=1;} else {ey=1; ex=0;} } else {//first pixel is R or B if (FC(0,0)==0) {ey=0; ex=0;} else {ey=1; ex=1;} } // Main algorithm: Tile loop //#pragma omp parallel for shared(rawData,height,width,red,green,blue) private(top,left) schedule(dynamic) //code is openmp ready; just have to pull local tile variable declarations inside the tile loop // Issue 1676 // use collapse(2) to collapse the 2 loops to one large loop, so there is better scaling #pragma omp for schedule(dynamic) collapse(2) nowait for (top=winy-16; top < winy+height; top += TS-32) for (left=winx-16; left < winx+width; left += TS-32) { memset(nyquist, 0, sizeof(char)*TS*TSH); memset(rbint, 0, sizeof(float)*TS*TSH); //location of tile bottom edge int bottom = min(top+TS,winy+height+16); //location of tile right edge int right = min(left+TS, winx+width+16); //tile width (=TS except for right edge of image) int rr1 = bottom - top; //tile height (=TS except for bottom edge of image) int cc1 = right - left; //tile vars //counters for pixel location in the image int row, col; //min and max row/column in the tile int rrmin, rrmax, ccmin, ccmax; //counters for pixel location within the tile int rr, cc; //color index 0=R, 1=G, 2=B int c; //pointer counters within the tile int indx, indx1; //dummy indices int i, j; //color ratios in up/down/left/right directions float cru, crd, crl, crr; //adaptive weights for vertical/horizontal/plus/minus directions float vwt, hwt, pwt, mwt; //vertical and horizontal G interpolations float Gintv, Ginth; //G interpolated in vert/hor directions using adaptive ratios float guar, gdar, glar, grar; //G interpolated in vert/hor directions using Hamilton-Adams method float guha, gdha, glha, grha; //interpolated G from fusing left/right or up/down float Ginthar, Ginthha, Gintvar, Gintvha; //color difference (G-R or G-B) variance in up/down/left/right directions float Dgrbvvaru, Dgrbvvard, Dgrbhvarl, Dgrbhvarr; float uave, dave, lave, rave; //color difference variances in vertical and horizontal directions float vcdvar, hcdvar, vcdvar1, hcdvar1, hcdaltvar, vcdaltvar; //adaptive interpolation weight using variance of color differences float varwt; // 639 - 644 //adaptive interpolation weight using difference of left-right and up-down G interpolations float diffwt; // 640 - 644 //alternative adaptive weight for combining horizontal/vertical interpolations float hvwtalt; // 745 - 748 //interpolation of G in four directions float gu, gd, gl, gr; //variance of G in vertical/horizontal directions float gvarh, gvarv; //Nyquist texture test float nyqtest; // 658 - 681 //accumulators for Nyquist texture interpolation float sumh, sumv, sumsqh, sumsqv, areawt; //color ratios in diagonal directions float crse, crnw, crne, crsw; //color differences in diagonal directions float rbse, rbnw, rbne, rbsw; //adaptive weights for combining diagonal interpolations float wtse, wtnw, wtsw, wtne; //alternate weight for combining diagonal interpolations float pmwtalt; // 885 - 888 //variance of R-B in plus/minus directions float rbvarm; // 843 - 848 // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // rgb from input CFA data // rgb values should be floating point number between 0 and 1 // after white balance multipliers are applied // a 16 pixel border is added to each side of the image // bookkeeping for borders if (top(winy+height)) {rrmax=winy+height-top;} else {rrmax=rr1;} if (right>(winx+width)) {ccmax=winx+width-left;} else {ccmax=cc1;} #ifdef __SSE2__ const __m128 c65535v = _mm_set1_ps( 65535.0f ); __m128 tempv; for (rr=rrmin; rr < rrmax; rr++){ for (row=rr+top, cc=ccmin; cc < ccmax-3; cc+=4) { indx1=rr*TS+cc; tempv = LVFU(rawData[row][cc+left]) / c65535v; _mm_store_ps( &cfa[indx1], tempv ); _mm_store_ps( &rgbgreen[indx1], tempv ); } for (; cc < ccmax; cc++) { indx1=rr*TS+cc; cfa[indx1] = (rawData[row][cc+left])/65535.0f; if(FC(rr,cc)==1) rgbgreen[indx1] = cfa[indx1]; } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders if (rrmin>0) { for (rr=0; rr<16; rr++) for (cc=ccmin,row = 32-rr+top; cc0) { for (rr=rrmin; rr0 && ccmin>0) { for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc+=4) { indx1 = (rr)*TS+cc; tempv = LVFU(rawData[winy+32-rr][winx+32-cc]) / c65535v; _mm_store_ps( &cfa[indx1], tempv ); _mm_store_ps( &rgbgreen[indx1], tempv ); } } if (rrmax0 && ccmax0) { for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc++) { cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f; if(FC(rr,cc)==1) rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc]; } } #else for (rr=rrmin; rr < rrmax; rr++) for (row=rr+top, cc=ccmin; cc < ccmax; cc++) { indx1=rr*TS+cc; cfa[indx1] = (rawData[row][cc+left])/65535.0f; if(FC(rr,cc)==1) rgbgreen[indx1] = cfa[indx1]; } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders if (rrmin>0) { for (rr=0; rr<16; rr++) for (cc=ccmin,row = 32-rr+top; cc0) { for (rr=rrmin; rr0 && ccmin>0) { for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc++) { cfa[(rr)*TS+cc] = (rawData[winy+32-rr][winx+32-cc])/65535.0f; if(FC(rr,cc)==1) rgbgreen[(rr)*TS+cc] = cfa[(rr)*TS+cc]; } } if (rrmax0 && ccmax0) { for (rr=0; rr<16; rr++) for (cc=0; cc<16; cc++) { cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f; if(FC(rr,cc)==1) rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc]; } } #endif //end of border fill // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #ifdef __SSE2__ __m128 delhv,delvv; const __m128 epsv = _mm_set1_ps( eps ); for (rr=2; rr < rr1-2; rr++) { for (cc=0, indx=(rr)*TS+cc; cc < cc1; cc+=4, indx+=4) { delhv = vabsf( LVFU( cfa[indx+1] ) - LVFU( cfa[indx-1] ) ); delvv = vabsf( LVF( cfa[indx+v1] ) - LVF( cfa[indx-v1] ) ); _mm_store_ps( &dirwts1[indx], epsv + vabsf( LVFU( cfa[indx+2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVFU( cfa[indx-2] )) + delhv ); delhv = delhv * delhv; _mm_store_ps( &dirwts0[indx], epsv + vabsf( LVF( cfa[indx+v2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVF( cfa[indx-v2] )) + delvv ); delvv = delvv * delvv; _mm_store_ps( &delhvsqsum[indx], delhv + delvv); } } #else // horizontal and vedrtical gradient float delh,delv; for (rr=2; rr < rr1-2; rr++) for (cc=2, indx=(rr)*TS+cc; cc < cc1-2; cc++, indx++) { delh = fabsf(cfa[indx+1]-cfa[indx-1]); delv = fabsf(cfa[indx+v1]-cfa[indx-v1]); dirwts0[indx] = eps+fabsf(cfa[indx+v2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-v2])+delv; dirwts1[indx] = eps+fabsf(cfa[indx+2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-2])+delh;//+fabsf(cfa[indx+2]-cfa[indx-2]); delhvsqsum[indx] = SQR(delh) + SQR(delv); } #endif #ifdef __SSE2__ __m128 Dgrbsq1pv, Dgrbsq1mv,temp2v; for (rr=6; rr < rr1-6; rr++){ if((FC(rr,2)&1)==0) { for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) { tempv = LC2VFU(cfa[indx+1]); Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx+1-p1]))+SQRV(tempv-LC2VFU(cfa[indx+1+p1]))); _mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+p1])-LC2VFU(cfa[indx-p1]))); _mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+m1])-LC2VFU(cfa[indx-m1]))); Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx+1-m1]))+SQRV(tempv-LC2VFU(cfa[indx+1+m1]))); _mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv ); _mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv ); } } else { for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) { tempv = LC2VFU(cfa[indx]); Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx-p1]))+SQRV(tempv-LC2VFU(cfa[indx+p1]))); _mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+1+p1])-LC2VFU(cfa[indx+1-p1]))); _mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+1+m1])-LC2VFU(cfa[indx+1-m1]))); Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx-m1]))+SQRV(tempv-LC2VFU(cfa[indx+m1]))); _mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv ); _mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv ); } } } #else for (rr=6; rr < rr1-6; rr++){ if((FC(rr,2)&1)==0) { for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) { delp[indx>>1] = fabsf(cfa[indx+p1]-cfa[indx-p1]); delm[indx>>1] = fabsf(cfa[indx+m1]-cfa[indx-m1]); Dgrbsq1p[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-p1])+SQR(cfa[indx+1]-cfa[indx+1+p1])); Dgrbsq1m[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-m1])+SQR(cfa[indx+1]-cfa[indx+1+m1])); } } else { for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) { Dgrbsq1p[indx>>1]=(SQR(cfa[indx]-cfa[indx-p1])+SQR(cfa[indx]-cfa[indx+p1])); Dgrbsq1m[indx>>1]=(SQR(cfa[indx]-cfa[indx-m1])+SQR(cfa[indx]-cfa[indx+m1])); delp[indx>>1] = fabsf(cfa[indx+1+p1]-cfa[indx+1-p1]); delm[indx>>1] = fabsf(cfa[indx+1+m1]-cfa[indx+1-m1]); } } } #endif // end of tile initialization // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //interpolate vertical and horizontal color differences #ifdef __SSE2__ __m128 sgnv,cruv,crdv,crlv,crrv,guhav,gdhav,glhav,grhav,hwtv,vwtv,Gintvhav,Ginthhav,guarv,gdarv,glarv,grarv; vmask clipmask; if( !(FC(4,4)&1) ) sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f ); else sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f ); __m128 zd5v = _mm_set1_ps( 0.5f ); __m128 onev = _mm_set1_ps( 1.0f ); __m128 arthreshv = _mm_set1_ps( arthresh ); __m128 clip_pt8v = _mm_set1_ps( clip_pt8 ); for (rr=4; rr clip_pt8 || Gintvha > clip_pt8 || Ginthha > clip_pt8) { //use HA if highlights are (nearly) clipped guar=guha; gdar=gdha; glar=glha; grar=grha; vcd[indx]=vcdalt[indx]; hcd[indx]=hcdalt[indx]; } //differences of interpolations in opposite directions dgintv[indx]=min(SQR(guha-gdha),SQR(guar-gdar)); dginth[indx]=min(SQR(glha-grha),SQR(glar-grar)); } } #endif #ifdef __SSE2__ __m128 hcdvarv, vcdvarv; __m128 hcdaltvarv,vcdaltvarv,hcdv,vcdv,hcdaltv,vcdaltv,sgn3v,Ginthv,Gintvv,hcdoldv,vcdoldv; __m128 threev = _mm_set1_ps( 3.0f ); __m128 clip_ptv = _mm_set1_ps( clip_pt ); __m128 nsgnv; vmask hcdmask, vcdmask,tempmask; if( !(FC(4,4)&1) ) sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f ); else sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f ); sgn3v = threev * sgnv; for (rr=4; rr0) { if (3.0f*hcd[indx] > (Ginth+cfa[indx])) { hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx]; } else { hwt = 1.0f -3.0f*hcd[indx]/(eps+Ginth+cfa[indx]); hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx]); } } if (vcd[indx]>0) { if (3.0f*vcd[indx] > (Gintv+cfa[indx])) { vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]; } else { vwt = 1.0f -3.0f*vcd[indx]/(eps+Gintv+cfa[indx]); vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]); } } if (Ginth > clip_pt) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for RT implementation if (Gintv > clip_pt) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]; //if (Ginth > pre_mul[c]) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for dcraw implementation //if (Gintv > pre_mul[c]) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]; } else {//R or B site Ginth = hcd[indx]+cfa[indx];//interpolated G Gintv = vcd[indx]+cfa[indx]; if (hcd[indx]<0) { if (3.0f*hcd[indx] < -(Ginth+cfa[indx])) { hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx]; } else { hwt = 1.0f +3.0f*hcd[indx]/(eps+Ginth+cfa[indx]); hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx]); } } if (vcd[indx]<0) { if (3.0f*vcd[indx] < -(Gintv+cfa[indx])) { vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]; } else { vwt = 1.0f +3.0f*vcd[indx]/(eps+Gintv+cfa[indx]); vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]); } } if (Ginth > clip_pt) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for RT implementation if (Gintv > clip_pt) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]; //if (Ginth > pre_mul[c]) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for dcraw implementation //if (Gintv > pre_mul[c]) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]; cddiffsq[indx] = SQR(vcd[indx]-hcd[indx]); } c = !c; } } #endif #ifdef __SSE2__ __m128 uavev,davev,lavev,ravev,Dgrbvvaruv,Dgrbvvardv,Dgrbhvarlv,Dgrbhvarrv,varwtv,diffwtv,vcdvar1v,hcdvar1v; __m128 epssqv = _mm_set1_ps( epssq ); vmask decmask; for (rr=6; rr>1], vself( decmask, varwtv, diffwtv)); } } #else for (rr=6; rr0 && fabsf(0.5-diffwt)>1]=varwt;} else {hvwt[indx>>1]=diffwt;} //hvwt[indx]=varwt; } } #endif // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Nyquist test for (rr=6; rr0) nyquist[indx>>1]=1;//nyquist=1 for nyquist region } unsigned int nyquisttemp; for (rr=8; rr>1]+nyquist[(indx-m1)>>1]+nyquist[(indx+p1)>>1]+ nyquist[(indx-2)>>1]+nyquist[indx>>1]+nyquist[(indx+2)>>1]+ nyquist[(indx-p1)>>1]+nyquist[(indx+m1)>>1]+nyquist[(indx+v2)>>1]); //if most of your neighbors are named Nyquist, it's likely that you're one too if (nyquisttemp>4) nyquist[indx>>1]=1; //or not if (nyquisttemp<4) nyquist[indx>>1]=0; } } // end of Nyquist test // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // in areas of Nyquist texture, do area interpolation for (rr=8; rr>1]) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // area interpolation sumh=sumv=sumsqh=sumsqv=areawt=0; for (i=-6; i<7; i+=2) for (j=-6; j<7; j+=2) { indx1=(rr+i)*TS+cc+j; if (nyquist[indx1>>1]) { sumh += cfa[indx1]-xdiv2f(cfa[indx1-1]+cfa[indx1+1]); sumv += cfa[indx1]-xdiv2f(cfa[indx1-v1]+cfa[indx1+v1]); sumsqh += xdiv2f(SQR(cfa[indx1]-cfa[indx1-1])+SQR(cfa[indx1]-cfa[indx1+1])); sumsqv += xdiv2f(SQR(cfa[indx1]-cfa[indx1-v1])+SQR(cfa[indx1]-cfa[indx1+v1])); areawt +=1; } } //horizontal and vertical color differences, and adaptive weight hcdvar=epssq+fabsf(areawt*sumsqh-sumh*sumh); vcdvar=epssq+fabsf(areawt*sumsqv-sumv*sumv); hvwt[indx>>1]=hcdvar/(vcdvar+hcdvar); // end of area interpolation // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //populate G at R/B sites for (rr=8; rr>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1],2); // hvwtalt = 0.25*(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1]); // vo=fabsf(0.5-hvwt[indx>>1]); // ve=fabsf(0.5-hvwtalt); if (fabsf(0.5-hvwt[indx>>1])>1]=hvwtalt;}//a better result was obtained from the neighbors // if (vo>1]=hvwtalt;}//a better result was obtained from the neighbors Dgrb[0][indx>>1] = (hcd[indx]*(1.0f-hvwt[indx>>1]) + vcd[indx]*hvwt[indx>>1]);//evaluate color differences //if (hvwt[indx]<0.5) Dgrb[indx][0]=hcd[indx]; //if (hvwt[indx]>0.5) Dgrb[indx][0]=vcd[indx]; rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1];//evaluate G (finally!) //local curvature in G (preparation for nyquist refinement step) if (nyquist[indx>>1]) { Dgrb2[indx>>1].h = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-1]+rgbgreen[indx+1])); Dgrb2[indx>>1].v = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-v1]+rgbgreen[indx+v1])); } else { Dgrb2[indx>>1].h = Dgrb2[indx>>1].v = 0; } } //end of standard interpolation // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // refine Nyquist areas using G curvatures for (rr=8; rr>1]) { //local averages (over Nyquist pixels only) of G curvature squared gvarh = epssq + (gquinc[0]*Dgrb2[indx>>1].h+ gquinc[1]*(Dgrb2[(indx-m1)>>1].h+Dgrb2[(indx+p1)>>1].h+Dgrb2[(indx-p1)>>1].h+Dgrb2[(indx+m1)>>1].h)+ gquinc[2]*(Dgrb2[(indx-v2)>>1].h+Dgrb2[(indx-2)>>1].h+Dgrb2[(indx+2)>>1].h+Dgrb2[(indx+v2)>>1].h)+ gquinc[3]*(Dgrb2[(indx-m2)>>1].h+Dgrb2[(indx+p2)>>1].h+Dgrb2[(indx-p2)>>1].h+Dgrb2[(indx+m2)>>1].h)); gvarv = epssq + (gquinc[0]*Dgrb2[indx>>1].v+ gquinc[1]*(Dgrb2[(indx-m1)>>1].v+Dgrb2[(indx+p1)>>1].v+Dgrb2[(indx-p1)>>1].v+Dgrb2[(indx+m1)>>1].v)+ gquinc[2]*(Dgrb2[(indx-v2)>>1].v+Dgrb2[(indx-2)>>1].v+Dgrb2[(indx+2)>>1].v+Dgrb2[(indx+v2)>>1].v)+ gquinc[3]*(Dgrb2[(indx-m2)>>1].v+Dgrb2[(indx+p2)>>1].v+Dgrb2[(indx-p2)>>1].v+Dgrb2[(indx+m2)>>1].v)); //use the results as weights for refined G interpolation Dgrb[0][indx>>1] = (hcd[indx]*gvarv + vcd[indx]*gvarh)/(gvarv+gvarh); rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1]; } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // diagonal interpolation correction #ifdef __SSE2__ __m128 rbsev,rbnwv,rbnev,rbswv,cfav,rbmv,rbpv,temp1v,wtv; __m128 wtsev, wtnwv, wtnev, wtswv, rbvarmv; __m128 gausseven0v = _mm_set1_ps(gausseven[0]); __m128 gausseven1v = _mm_set1_ps(gausseven[1]); __m128 twov = _mm_set1_ps(2.0f); #endif for (rr=8; rr>1; cc>1])+LVFU(delm[(indx+m2)>>1]);//same as for wtu,wtd,wtl,wtr wtnwv= temp1v+LVFU(delm[(indx-m1)>>1])+LVFU(delm[(indx-m2)>>1]); rbmv = (wtsev*rbnwv+wtnwv*rbsev)/(wtsev+wtnwv); temp1v = ULIMV(rbmv ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1])); wtv = twov * (cfav-rbmv)/(epsv+rbmv+cfav); temp2v = wtv * rbmv + (onev-wtv)*temp1v; temp2v = vself(vmaskf_lt(rbmv + rbmv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbmv, cfav), temp2v, rbmv); _mm_storeu_ps(&rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1])), temp2v )); temp1v = LC2VFU(cfa[indx+p1]); temp2v = LC2VFU(cfa[indx+p2]); rbnev = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbnev = vself(vmaskf_lt(vabsf(onev - rbnev), arthreshv), cfav * rbnev, temp1v + zd5v * (cfav - temp2v)); temp1v = LC2VFU(cfa[indx-p1]); temp2v = LC2VFU(cfa[indx-p2]); rbswv = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbswv = vself(vmaskf_lt(vabsf(onev - rbswv), arthreshv), cfav * rbswv, temp1v + zd5v * (cfav - temp2v)); temp1v = epsv + LVFU(delp[indx1]); wtnev= temp1v+LVFU(delp[(indx+p1)>>1])+LVFU(delp[(indx+p2)>>1]); wtswv= temp1v+LVFU(delp[(indx-p1)>>1])+LVFU(delp[(indx-p2)>>1]); rbpv = (wtnev*rbswv+wtswv*rbnev)/(wtnev+wtswv); temp1v = ULIMV(rbpv ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1])); wtv = twov * (cfav-rbpv)/(epsv+rbpv+cfav); temp2v = wtv * rbpv + (onev-wtv)*temp1v; temp2v = vself(vmaskf_lt(rbpv + rbpv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbpv, cfav), temp2v, rbpv); _mm_storeu_ps(&rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1])), temp2v )); rbvarmv = epssqv + (gausseven0v*(LVFU(Dgrbsq1m[(indx-v1)>>1])+LVFU(Dgrbsq1m[(indx-1)>>1])+LVFU(Dgrbsq1m[(indx+1)>>1])+LVFU(Dgrbsq1m[(indx+v1)>>1])) + gausseven1v*(LVFU(Dgrbsq1m[(indx-v2-1)>>1])+LVFU(Dgrbsq1m[(indx-v2+1)>>1])+LVFU(Dgrbsq1m[(indx-2-v1)>>1])+LVFU(Dgrbsq1m[(indx+2-v1)>>1])+ LVFU(Dgrbsq1m[(indx-2+v1)>>1])+LVFU(Dgrbsq1m[(indx+2+v1)>>1])+LVFU(Dgrbsq1m[(indx+v2-1)>>1])+LVFU(Dgrbsq1m[(indx+v2+1)>>1]))); _mm_storeu_ps(&pmwt[indx1] , rbvarmv/((epssqv + (gausseven0v*(LVFU(Dgrbsq1p[(indx-v1)>>1])+LVFU(Dgrbsq1p[(indx-1)>>1])+LVFU(Dgrbsq1p[(indx+1)>>1])+LVFU(Dgrbsq1p[(indx+v1)>>1])) + gausseven1v*(LVFU(Dgrbsq1p[(indx-v2-1)>>1])+LVFU(Dgrbsq1p[(indx-v2+1)>>1])+LVFU(Dgrbsq1p[(indx-2-v1)>>1])+LVFU(Dgrbsq1p[(indx+2-v1)>>1])+ LVFU(Dgrbsq1p[(indx-2+v1)>>1])+LVFU(Dgrbsq1p[(indx+2+v1)>>1])+LVFU(Dgrbsq1p[(indx+v2-1)>>1])+LVFU(Dgrbsq1p[(indx+v2+1)>>1]))))+rbvarmv)); } #else for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc>1]+delm[(indx+m2)>>1];//same as for wtu,wtd,wtl,wtr wtnw= eps+delm[indx1]+delm[(indx-m1)>>1]+delm[(indx-m2)>>1]; wtne= eps+delp[indx1]+delp[(indx+p1)>>1]+delp[(indx+p2)>>1]; wtsw= eps+delp[indx1]+delp[(indx-p1)>>1]+delp[(indx-p2)>>1]; rbm[indx1] = (wtse*rbnw+wtnw*rbse)/(wtse+wtnw); rbp[indx1] = (wtne*rbsw+wtsw*rbne)/(wtne+wtsw); /* rbvarp = epssq + (gausseven[0]*(Dgrbsq1[indx-v1].p+Dgrbsq1[indx-1].p+Dgrbsq1[indx+1].p+Dgrbsq1[indx+v1].p) + gausseven[1]*(Dgrbsq1[indx-v2-1].p+Dgrbsq1[indx-v2+1].p+Dgrbsq1[indx-2-v1].p+Dgrbsq1[indx+2-v1].p+ Dgrbsq1[indx-2+v1].p+Dgrbsq1[indx+2+v1].p+Dgrbsq1[indx+v2-1].p+Dgrbsq1[indx+v2+1].p)); */ rbvarm = epssq + (gausseven[0]*(Dgrbsq1m[(indx-v1)>>1]+Dgrbsq1m[(indx-1)>>1]+Dgrbsq1m[(indx+1)>>1]+Dgrbsq1m[(indx+v1)>>1]) + gausseven[1]*(Dgrbsq1m[(indx-v2-1)>>1]+Dgrbsq1m[(indx-v2+1)>>1]+Dgrbsq1m[(indx-2-v1)>>1]+Dgrbsq1m[(indx+2-v1)>>1]+ Dgrbsq1m[(indx-2+v1)>>1]+Dgrbsq1m[(indx+2+v1)>>1]+Dgrbsq1m[(indx+v2-1)>>1]+Dgrbsq1m[(indx+v2+1)>>1])); pmwt[indx1] = rbvarm/((epssq + (gausseven[0]*(Dgrbsq1p[(indx-v1)>>1]+Dgrbsq1p[(indx-1)>>1]+Dgrbsq1p[(indx+1)>>1]+Dgrbsq1p[(indx+v1)>>1]) + gausseven[1]*(Dgrbsq1p[(indx-v2-1)>>1]+Dgrbsq1p[(indx-v2+1)>>1]+Dgrbsq1p[(indx-2-v1)>>1]+Dgrbsq1p[(indx+2-v1)>>1]+ Dgrbsq1p[(indx-2+v1)>>1]+Dgrbsq1p[(indx+2+v1)>>1]+Dgrbsq1p[(indx+v2-1)>>1]+Dgrbsq1p[(indx+v2+1)>>1])))+rbvarm); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //bound the interpolation in regions of high saturation if (rbp[indx1] clip_pt) rbp[indx1]=ULIM(rbp[indx1],cfa[indx-p1],cfa[indx+p1]);//for RT implementation if (rbm[indx1] > clip_pt) rbm[indx1]=ULIM(rbm[indx1],cfa[indx-m1],cfa[indx+m1]); //c=2-FC(rr,cc);//for dcraw implementation //if (rbp[indx] > pre_mul[c]) rbp[indx]=ULIM(rbp[indx],cfa[indx-p1],cfa[indx+p1]); //if (rbm[indx] > pre_mul[c]) rbm[indx]=ULIM(rbm[indx],cfa[indx-m1],cfa[indx+m1]); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //rbint[indx] = 0.5*(cfa[indx] + (rbp*rbvarm+rbm*rbvarp)/(rbvarp+rbvarm));//this is R+B, interpolated } #endif } #ifdef __SSE2__ __m128 pmwtaltv; __m128 zd25v = _mm_set1_ps(0.25f); #endif for (rr=10; rr>1; cc>1])+LVFU(pmwt[(indx+p1)>>1])+LVFU(pmwt[(indx-p1)>>1])+LVFU(pmwt[(indx+m1)>>1])); tempv = LVFU(pmwt[indx1]); tempv = vself(vmaskf_lt(vabsf(zd5v-tempv), vabsf(zd5v-pmwtaltv)), pmwtaltv, tempv); _mm_storeu_ps( &pmwt[indx1], tempv); _mm_storeu_ps( &rbint[indx1], zd5v * (LC2VFU(cfa[indx]) + LVFU(rbm[indx1]) * (onev - tempv) + LVFU(rbp[indx1]) * tempv)); } #else for (cc=10+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc>1]+pmwt[(indx+p1)>>1]+pmwt[(indx-p1)>>1]+pmwt[(indx+m1)>>1],2); if (fabsf(0.5-pmwt[indx1])>1; cc>1])>1]) ) continue; //now interpolate G vertically/horizontally using R+B values //unfortunately, since G interpolation cannot be done diagonally this may lead to color shifts //color ratios for G interpolation cru = cfa[indx-v1]*2.0/(eps+rbint[indx1]+rbint[(indx1-v1)]); crd = cfa[indx+v1]*2.0/(eps+rbint[indx1]+rbint[(indx1+v1)]); crl = cfa[indx-1]*2.0/(eps+rbint[indx1]+rbint[(indx1-1)]); crr = cfa[indx+1]*2.0/(eps+rbint[indx1]+rbint[(indx1+1)]); //interpolated G via adaptive ratios or Hamilton-Adams in each cardinal direction if (fabsf(1.0f-cru) clip_pt) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]);//for RT implementation if (Gintv > clip_pt) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]); //c=FC(rr,cc);//for dcraw implementation //if (Ginth > pre_mul[c]) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]); //if (Gintv > pre_mul[c]) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rgbgreen[indx] = Ginth*(1.0f-hvwt[indx1]) + Gintv*hvwt[indx1]; //rgb[indx][1] = 0.5*(rgb[indx][1]+0.25*(rgb[indx-v1][1]+rgb[indx+v1][1]+rgb[indx-1][1]+rgb[indx+1][1])); Dgrb[0][indx>>1] = rgbgreen[indx]-cfa[indx]; //rgb[indx][2-FC(rr,cc)]=2*rbint[indx]-cfa[indx]; } //end of diagonal interpolation correction // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fancy chrominance interpolation //(ey,ex) is location of R site for (rr=13-ey; rr>1; cc>1])-LVFU(Dgrb[c][(indx+m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1]))); wtnev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1]))); wtswv=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1]))); wtsev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1]))); //Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse); _mm_storeu_ps(&Dgrb[c][indx>>1], (wtnwv*(oned325v*LVFU(Dgrb[c][(indx-m1)>>1])-zd175v*LVFU(Dgrb[c][(indx-m3)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-v2)>>1]) )+ wtnev*(oned325v*LVFU(Dgrb[c][(indx+p1)>>1])-zd175v*LVFU(Dgrb[c][(indx+p3)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+v2)>>1]) )+ wtswv*(oned325v*LVFU(Dgrb[c][(indx-p1)>>1])-zd175v*LVFU(Dgrb[c][(indx-p3)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-v2)>>1]) )+ wtsev*(oned325v*LVFU(Dgrb[c][(indx+m1)>>1])-zd175v*LVFU(Dgrb[c][(indx+m3)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+v2)>>1]) ))/(wtnwv+wtnev+wtswv+wtsev)); } #else for (cc=14+(FC(rr,2)&1),indx=rr*TS+cc,c=1-FC(rr,cc)/2; cc>1]-Dgrb[c][(indx+m1)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx-m3)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m3)>>1])); wtne=1.0f/(eps+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p1)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx+p3)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p3)>>1])); wtsw=1.0f/(eps+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p1)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+m3)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p3)>>1])); wtse=1.0f/(eps+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m1)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-p3)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx+m3)>>1])); //Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse); Dgrb[c][indx>>1]=(wtnw*(1.325f*Dgrb[c][(indx-m1)>>1]-0.175f*Dgrb[c][(indx-m3)>>1]-0.075f*Dgrb[c][(indx-m1-2)>>1]-0.075f*Dgrb[c][(indx-m1-v2)>>1] )+ wtne*(1.325f*Dgrb[c][(indx+p1)>>1]-0.175f*Dgrb[c][(indx+p3)>>1]-0.075f*Dgrb[c][(indx+p1+2)>>1]-0.075f*Dgrb[c][(indx+p1+v2)>>1] )+ wtsw*(1.325f*Dgrb[c][(indx-p1)>>1]-0.175f*Dgrb[c][(indx-p3)>>1]-0.075f*Dgrb[c][(indx-p1-2)>>1]-0.075f*Dgrb[c][(indx-p1-v2)>>1] )+ wtse*(1.325f*Dgrb[c][(indx+m1)>>1]-0.175f*Dgrb[c][(indx+m3)>>1]-0.075f*Dgrb[c][(indx+m1+2)>>1]-0.075f*Dgrb[c][(indx+m1+v2)>>1] ))/(wtnw+wtne+wtsw+wtse); } #endif float temp; for (rr=16; rr>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1])); red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])* temp); blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])* temp); indx++; col++; red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]); blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]); } if(cc1&1) { // width of tile is odd col = cc + left; temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1])); red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])* temp); blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])* temp); } } else { for (cc=16,indx=rr*TS+cc,row=rr+top; cc>1]); blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]); indx++; col++; temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1])); red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])* temp); blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])* temp); } if(cc1&1) { // width of tile is odd col = cc + left; red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]); blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]); } } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // copy smoothed results back to image matrix for (rr=16; rr < rr1-16; rr++){ #ifdef __SSE2__ for (row=rr+top, cc=16; cc < cc1-19; cc+=4) { _mm_storeu_ps(&green[row][cc + left], LVF(rgbgreen[rr*TS+cc]) * c65535v); } #else for (row=rr+top, cc=16; cc < cc1-16; cc++) { col = cc + left; indx=rr*TS+cc; green[row][col] = ((65535.0f*rgbgreen[indx])); //for dcraw implementation //for (c=0; c<3; c++){ // image[indx][c] = CLIP((int)(65535.0f*rgb[rr*TS+cc][c] + 0.5f)); //} } #endif } //end of main loop if(plistener) { progresscounter++; if(progresscounter % 4 == 0) { #pragma omp critical { progress+=(double)4*((TS-32)*(TS-32))/(height*width); if (progress>1.0) { progress=1.0; } plistener->setProgress(progress); } } } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // clean up free(buffer); } if(plistener) plistener->setProgress(1.0); // done #undef TS } }