//////////////////////////////////////////////////////////////// // // AMaZE demosaic algorithm // (Aliasing Minimization and Zipper Elimination) // // copyright (c) 2008-2010 Emil Martinec // // incorporating ideas of Luis Sanz Rodrigues and Paul Lee // // code dated: May 27, 2010 // // amaze_interpolate_RT.cc is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // //////////////////////////////////////////////////////////////// #include "rtengine.h" #include "rawimagesource.h" #include "rt_math.h" #include "../rtgui/multilangmgr.h" #include "procparams.h" #include "sleef.c" #include "opthelper.h" namespace rtengine { SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) { #define HCLIP(x) x //is this still necessary??? //min(clip_pt,x) int width = winw, height = winh; const float clip_pt = 1 / initialGain; const float clip_pt8 = 0.8f / initialGain; #define TS 160 // Tile size; the image is processed in square tiles to lower memory requirements and facilitate multi-threading #define TSH 80 // half of Tile size // local variables //offset of R pixel within a Bayer quartet int ex, ey; //shifts of pointer value to access pixels in vertical and diagonal directions static const int v1 = TS, v2 = 2 * TS, v3 = 3 * TS, p1 = -TS + 1, p2 = -2 * TS + 2, p3 = -3 * TS + 3, m1 = TS + 1, m2 = 2 * TS + 2, m3 = 3 * TS + 3; //tolerance to avoid dividing by zero static const float eps = 1e-5, epssq = 1e-10; //tolerance to avoid dividing by zero //adaptive ratios threshold static const float arthresh = 0.75; //nyquist texture test threshold static const float nyqthresh = 0.5; //gaussian on 5x5 quincunx, sigma=1.2 static const float gaussodd[4] = {0.14659727707323927f, 0.103592713382435f, 0.0732036125103057f, 0.0365543548389495f}; //gaussian on 5x5, sigma=1.2 static const float gaussgrad[6] = {0.07384411893421103f, 0.06207511968171489f, 0.0521818194747806f, 0.03687419286733595f, 0.03099732204057846f, 0.018413194161458882f }; //gaussian on 5x5 alt quincunx, sigma=1.5 static const float gausseven[2] = {0.13719494435797422f, 0.05640252782101291f}; //guassian on quincunx grid static const float gquinc[4] = {0.169917f, 0.108947f, 0.069855f, 0.0287182f}; volatile double progress = 0.0; // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Issue 1676 // Moved from inside the parallel section if (plistener) { plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::BayerSensor::methodstring[RAWParams::BayerSensor::amaze])); plistener->setProgress (0.0); } struct s_hv { float h; float v; }; #pragma omp parallel { int progresscounter = 0; //position of top/left corner of the tile int top, left; // beginning of storage block for tile char *buffer; // green values float (*rgbgreen); // sum of square of horizontal gradient and square of vertical gradient float (*delhvsqsum); // gradient based directional weights for interpolation float (*dirwts0); float (*dirwts1); // vertically interpolated color differences G-R, G-B float (*vcd); // horizontally interpolated color differences float (*hcd); // alternative vertical interpolation float (*vcdalt); // alternative horizontal interpolation float (*hcdalt); // square of average color difference float (*cddiffsq); // weight to give horizontal vs vertical interpolation float (*hvwt); // final interpolated color difference float (*Dgrb)[TS * TSH]; // float (*Dgrb)[2]; // gradient in plus (NE/SW) direction float (*delp); // gradient in minus (NW/SE) direction float (*delm); // diagonal interpolation of R+B float (*rbint); // horizontal and vertical curvature of interpolated G (used to refine interpolation in Nyquist texture regions) s_hv (*Dgrb2); // difference between up/down interpolations of G float (*dgintv); // difference between left/right interpolations of G float (*dginth); // diagonal (plus) color difference R-B or G1-G2 // float (*Dgrbp1); // diagonal (minus) color difference R-B or G1-G2 // float (*Dgrbm1); float (*Dgrbsq1m); float (*Dgrbsq1p); // s_mp (*Dgrbsq1); // square of diagonal color difference // float (*Dgrbpsq1); // square of diagonal color difference // float (*Dgrbmsq1); // tile raw data float (*cfa); // relative weight for combining plus and minus diagonal interpolations float (*pmwt); // interpolated color difference R-B in minus and plus direction float (*rbm); float (*rbp); // nyquist texture flag 1=nyquist, 0=not nyquist char (*nyquist); #define CLF 1 // assign working space buffer = (char *) calloc(22 * sizeof(float) * TS * TS + sizeof(char) * TS * TSH + 23 * CLF * 64 + 63, 1); char *data; data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64); //merror(buffer,"amaze_interpolate()"); rgbgreen = (float (*)) data; //pointers to array delhvsqsum = (float (*)) ((char*)rgbgreen + sizeof(float) * TS * TS + CLF * 64); dirwts0 = (float (*)) ((char*)delhvsqsum + sizeof(float) * TS * TS + CLF * 64); dirwts1 = (float (*)) ((char*)dirwts0 + sizeof(float) * TS * TS + CLF * 64); vcd = (float (*)) ((char*)dirwts1 + sizeof(float) * TS * TS + CLF * 64); hcd = (float (*)) ((char*)vcd + sizeof(float) * TS * TS + CLF * 64); vcdalt = (float (*)) ((char*)hcd + sizeof(float) * TS * TS + CLF * 64); hcdalt = (float (*)) ((char*)vcdalt + sizeof(float) * TS * TS + CLF * 64); cddiffsq = (float (*)) ((char*)hcdalt + sizeof(float) * TS * TS + CLF * 64); hvwt = (float (*)) ((char*)cddiffsq + sizeof(float) * TS * TS + CLF * 64); Dgrb = (float (*)[TS * TSH]) ((char*)hvwt + sizeof(float) * TS * TSH + CLF * 64); delp = (float (*)) ((char*)Dgrb + sizeof(float) * TS * TS + CLF * 64); delm = (float (*)) ((char*)delp + sizeof(float) * TS * TSH + CLF * 64); rbint = (float (*)) ((char*)delm + sizeof(float) * TS * TSH + CLF * 64); Dgrb2 = (s_hv (*)) ((char*)rbint + sizeof(float) * TS * TSH + CLF * 64); dgintv = (float (*)) ((char*)Dgrb2 + sizeof(float) * TS * TS + CLF * 64); dginth = (float (*)) ((char*)dgintv + sizeof(float) * TS * TS + CLF * 64); Dgrbsq1m = (float (*)) ((char*)dginth + sizeof(float) * TS * TS + CLF * 64); Dgrbsq1p = (float (*)) ((char*)Dgrbsq1m + sizeof(float) * TS * TSH + CLF * 64); cfa = (float (*)) ((char*)Dgrbsq1p + sizeof(float) * TS * TSH + CLF * 64); pmwt = (float (*)) ((char*)cfa + sizeof(float) * TS * TS + CLF * 64); rbm = (float (*)) ((char*)pmwt + sizeof(float) * TS * TSH + CLF * 64); rbp = (float (*)) ((char*)rbm + sizeof(float) * TS * TSH + CLF * 64); nyquist = (char (*)) ((char*)rbp + sizeof(float) * TS * TSH + CLF * 64); #undef CLF // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //determine GRBG coset; (ey,ex) is the offset of the R subarray if (FC(0, 0) == 1) { //first pixel is G if (FC(0, 1) == 0) { ey = 0; ex = 1; } else { ey = 1; ex = 0; } } else {//first pixel is R or B if (FC(0, 0) == 0) { ey = 0; ex = 0; } else { ey = 1; ex = 1; } } // Main algorithm: Tile loop //#pragma omp parallel for shared(rawData,height,width,red,green,blue) private(top,left) schedule(dynamic) //code is openmp ready; just have to pull local tile variable declarations inside the tile loop // Issue 1676 // use collapse(2) to collapse the 2 loops to one large loop, so there is better scaling #pragma omp for schedule(dynamic) collapse(2) nowait for (top = winy - 16; top < winy + height; top += TS - 32) for (left = winx - 16; left < winx + width; left += TS - 32) { memset(nyquist, 0, sizeof(char)*TS * TSH); memset(rbint, 0, sizeof(float)*TS * TSH); //location of tile bottom edge int bottom = min(top + TS, winy + height + 16); //location of tile right edge int right = min(left + TS, winx + width + 16); //tile width (=TS except for right edge of image) int rr1 = bottom - top; //tile height (=TS except for bottom edge of image) int cc1 = right - left; //tile vars //counters for pixel location in the image int row, col; //min and max row/column in the tile int rrmin, rrmax, ccmin, ccmax; //counters for pixel location within the tile int rr, cc; //color index 0=R, 1=G, 2=B int c; //pointer counters within the tile int indx, indx1; //dummy indices int i, j; //color ratios in up/down/left/right directions float cru, crd, crl, crr; //adaptive weights for vertical/horizontal/plus/minus directions float vwt, hwt, pwt, mwt; //vertical and horizontal G interpolations float Gintv, Ginth; //G interpolated in vert/hor directions using adaptive ratios float guar, gdar, glar, grar; //G interpolated in vert/hor directions using Hamilton-Adams method float guha, gdha, glha, grha; //interpolated G from fusing left/right or up/down float Ginthar, Ginthha, Gintvar, Gintvha; //color difference (G-R or G-B) variance in up/down/left/right directions float Dgrbvvaru, Dgrbvvard, Dgrbhvarl, Dgrbhvarr; float uave, dave, lave, rave; //color difference variances in vertical and horizontal directions float vcdvar, hcdvar, vcdvar1, hcdvar1, hcdaltvar, vcdaltvar; //adaptive interpolation weight using variance of color differences float varwt; // 639 - 644 //adaptive interpolation weight using difference of left-right and up-down G interpolations float diffwt; // 640 - 644 //alternative adaptive weight for combining horizontal/vertical interpolations float hvwtalt; // 745 - 748 //interpolation of G in four directions float gu, gd, gl, gr; //variance of G in vertical/horizontal directions float gvarh, gvarv; //Nyquist texture test float nyqtest; // 658 - 681 //accumulators for Nyquist texture interpolation float sumh, sumv, sumsqh, sumsqv, areawt; //color ratios in diagonal directions float crse, crnw, crne, crsw; //color differences in diagonal directions float rbse, rbnw, rbne, rbsw; //adaptive weights for combining diagonal interpolations float wtse, wtnw, wtsw, wtne; //alternate weight for combining diagonal interpolations float pmwtalt; // 885 - 888 //variance of R-B in plus/minus directions float rbvarm; // 843 - 848 // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // rgb from input CFA data // rgb values should be floating point number between 0 and 1 // after white balance multipliers are applied // a 16 pixel border is added to each side of the image // bookkeeping for borders if (top < winy) { rrmin = 16; } else { rrmin = 0; } if (left < winx) { ccmin = 16; } else { ccmin = 0; } if (bottom > (winy + height)) { rrmax = winy + height - top; } else { rrmax = rr1; } if (right > (winx + width)) { ccmax = winx + width - left; } else { ccmax = cc1; } #ifdef __SSE2__ const __m128 c65535v = _mm_set1_ps( 65535.0f ); __m128 tempv; for (rr = rrmin; rr < rrmax; rr++) { for (row = rr + top, cc = ccmin; cc < ccmax - 3; cc += 4) { indx1 = rr * TS + cc; tempv = LVFU(rawData[row][cc + left]) / c65535v; _mm_store_ps( &cfa[indx1], tempv ); _mm_store_ps( &rgbgreen[indx1], tempv ); } for (; cc < ccmax; cc++) { indx1 = rr * TS + cc; cfa[indx1] = (rawData[row][cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[indx1] = cfa[indx1]; } } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders if (rrmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = ccmin, row = 32 - rr + top; cc < ccmax; cc++) { cfa[rr * TS + cc] = (rawData[row][cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + cc] = cfa[rr * TS + cc]; } } } if (rrmax < rr1) { for (rr = 0; rr < 16; rr++) for (cc = ccmin; cc < ccmax; cc += 4) { indx1 = (rrmax + rr) * TS + cc; tempv = LVFU(rawData[(winy + height - rr - 2)][left + cc]) / c65535v; _mm_store_ps( &cfa[indx1], tempv ); _mm_store_ps( &rgbgreen[indx1], tempv ); } } if (ccmin > 0) { for (rr = rrmin; rr < rrmax; rr++) for (cc = 0, row = rr + top; cc < 16; cc++) { cfa[rr * TS + cc] = (rawData[row][32 - cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + cc] = cfa[rr * TS + cc]; } } } if (ccmax < cc1) { for (rr = rrmin; rr < rrmax; rr++) for (cc = 0; cc < 16; cc++) { cfa[rr * TS + ccmax + cc] = (rawData[(top + rr)][(winx + width - cc - 2)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + ccmax + cc] = cfa[rr * TS + ccmax + cc]; } } } //also, fill the image corners if (rrmin > 0 && ccmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc += 4) { indx1 = (rr) * TS + cc; tempv = LVFU(rawData[winy + 32 - rr][winx + 32 - cc]) / c65535v; _mm_store_ps( &cfa[indx1], tempv ); _mm_store_ps( &rgbgreen[indx1], tempv ); } } if (rrmax < rr1 && ccmax < cc1) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc += 4) { indx1 = (rrmax + rr) * TS + ccmax + cc; tempv = LVFU(rawData[(winy + height - rr - 2)][(winx + width - cc - 2)]) / c65535v; _mm_storeu_ps( &cfa[indx1], tempv ); _mm_storeu_ps( &rgbgreen[indx1], tempv ); } } if (rrmin > 0 && ccmax < cc1) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rr)*TS + ccmax + cc] = (rawData[(winy + 32 - rr)][(winx + width - cc - 2)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rr)*TS + ccmax + cc] = cfa[(rr) * TS + ccmax + cc]; } } } if (rrmax < rr1 && ccmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rrmax + rr)*TS + cc] = (rawData[(winy + height - rr - 2)][(winx + 32 - cc)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rrmax + rr)*TS + cc] = cfa[(rrmax + rr) * TS + cc]; } } } #else for (rr = rrmin; rr < rrmax; rr++) for (row = rr + top, cc = ccmin; cc < ccmax; cc++) { indx1 = rr * TS + cc; cfa[indx1] = (rawData[row][cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[indx1] = cfa[indx1]; } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fill borders if (rrmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = ccmin, row = 32 - rr + top; cc < ccmax; cc++) { cfa[rr * TS + cc] = (rawData[row][cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + cc] = cfa[rr * TS + cc]; } } } if (rrmax < rr1) { for (rr = 0; rr < 16; rr++) for (cc = ccmin; cc < ccmax; cc++) { cfa[(rrmax + rr)*TS + cc] = (rawData[(winy + height - rr - 2)][left + cc]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rrmax + rr)*TS + cc] = cfa[(rrmax + rr) * TS + cc]; } } } if (ccmin > 0) { for (rr = rrmin; rr < rrmax; rr++) for (cc = 0, row = rr + top; cc < 16; cc++) { cfa[rr * TS + cc] = (rawData[row][32 - cc + left]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + cc] = cfa[rr * TS + cc]; } } } if (ccmax < cc1) { for (rr = rrmin; rr < rrmax; rr++) for (cc = 0; cc < 16; cc++) { cfa[rr * TS + ccmax + cc] = (rawData[(top + rr)][(winx + width - cc - 2)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[rr * TS + ccmax + cc] = cfa[rr * TS + ccmax + cc]; } } } //also, fill the image corners if (rrmin > 0 && ccmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rr)*TS + cc] = (rawData[winy + 32 - rr][winx + 32 - cc]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rr)*TS + cc] = cfa[(rr) * TS + cc]; } } } if (rrmax < rr1 && ccmax < cc1) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rrmax + rr)*TS + ccmax + cc] = (rawData[(winy + height - rr - 2)][(winx + width - cc - 2)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rrmax + rr)*TS + ccmax + cc] = cfa[(rrmax + rr) * TS + ccmax + cc]; } } } if (rrmin > 0 && ccmax < cc1) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rr)*TS + ccmax + cc] = (rawData[(winy + 32 - rr)][(winx + width - cc - 2)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rr)*TS + ccmax + cc] = cfa[(rr) * TS + ccmax + cc]; } } } if (rrmax < rr1 && ccmin > 0) { for (rr = 0; rr < 16; rr++) for (cc = 0; cc < 16; cc++) { cfa[(rrmax + rr)*TS + cc] = (rawData[(winy + height - rr - 2)][(winx + 32 - cc)]) / 65535.0f; if(FC(rr, cc) == 1) { rgbgreen[(rrmax + rr)*TS + cc] = cfa[(rrmax + rr) * TS + cc]; } } } #endif //end of border fill // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #ifdef __SSE2__ __m128 delhv, delvv; const __m128 epsv = _mm_set1_ps( eps ); for (rr = 2; rr < rr1 - 2; rr++) { for (cc = 0, indx = (rr) * TS + cc; cc < cc1; cc += 4, indx += 4) { delhv = vabsf( LVFU( cfa[indx + 1] ) - LVFU( cfa[indx - 1] ) ); delvv = vabsf( LVF( cfa[indx + v1] ) - LVF( cfa[indx - v1] ) ); _mm_store_ps( &dirwts1[indx], epsv + vabsf( LVFU( cfa[indx + 2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVFU( cfa[indx - 2] )) + delhv ); delhv = delhv * delhv; _mm_store_ps( &dirwts0[indx], epsv + vabsf( LVF( cfa[indx + v2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVF( cfa[indx - v2] )) + delvv ); delvv = delvv * delvv; _mm_store_ps( &delhvsqsum[indx], delhv + delvv); } } #else // horizontal and vedrtical gradient float delh, delv; for (rr = 2; rr < rr1 - 2; rr++) for (cc = 2, indx = (rr) * TS + cc; cc < cc1 - 2; cc++, indx++) { delh = fabsf(cfa[indx + 1] - cfa[indx - 1]); delv = fabsf(cfa[indx + v1] - cfa[indx - v1]); dirwts0[indx] = eps + fabsf(cfa[indx + v2] - cfa[indx]) + fabsf(cfa[indx] - cfa[indx - v2]) + delv; dirwts1[indx] = eps + fabsf(cfa[indx + 2] - cfa[indx]) + fabsf(cfa[indx] - cfa[indx - 2]) + delh; //+fabsf(cfa[indx+2]-cfa[indx-2]); delhvsqsum[indx] = SQR(delh) + SQR(delv); } #endif #ifdef __SSE2__ __m128 Dgrbsq1pv, Dgrbsq1mv, temp2v; for (rr = 6; rr < rr1 - 6; rr++) { if((FC(rr, 2) & 1) == 0) { for (cc = 6, indx = (rr) * TS + cc; cc < cc1 - 6; cc += 8, indx += 8) { tempv = LC2VFU(cfa[indx + 1]); Dgrbsq1pv = (SQRV(tempv - LC2VFU(cfa[indx + 1 - p1])) + SQRV(tempv - LC2VFU(cfa[indx + 1 + p1]))); _mm_storeu_ps( &delp[indx >> 1], vabsf(LC2VFU(cfa[indx + p1]) - LC2VFU(cfa[indx - p1]))); _mm_storeu_ps( &delm[indx >> 1], vabsf(LC2VFU(cfa[indx + m1]) - LC2VFU(cfa[indx - m1]))); Dgrbsq1mv = (SQRV(tempv - LC2VFU(cfa[indx + 1 - m1])) + SQRV(tempv - LC2VFU(cfa[indx + 1 + m1]))); _mm_storeu_ps( &Dgrbsq1m[indx >> 1], Dgrbsq1mv ); _mm_storeu_ps( &Dgrbsq1p[indx >> 1], Dgrbsq1pv ); } } else { for (cc = 6, indx = (rr) * TS + cc; cc < cc1 - 6; cc += 8, indx += 8) { tempv = LC2VFU(cfa[indx]); Dgrbsq1pv = (SQRV(tempv - LC2VFU(cfa[indx - p1])) + SQRV(tempv - LC2VFU(cfa[indx + p1]))); _mm_storeu_ps( &delp[indx >> 1], vabsf(LC2VFU(cfa[indx + 1 + p1]) - LC2VFU(cfa[indx + 1 - p1]))); _mm_storeu_ps( &delm[indx >> 1], vabsf(LC2VFU(cfa[indx + 1 + m1]) - LC2VFU(cfa[indx + 1 - m1]))); Dgrbsq1mv = (SQRV(tempv - LC2VFU(cfa[indx - m1])) + SQRV(tempv - LC2VFU(cfa[indx + m1]))); _mm_storeu_ps( &Dgrbsq1m[indx >> 1], Dgrbsq1mv ); _mm_storeu_ps( &Dgrbsq1p[indx >> 1], Dgrbsq1pv ); } } } #else for (rr = 6; rr < rr1 - 6; rr++) { if((FC(rr, 2) & 1) == 0) { for (cc = 6, indx = (rr) * TS + cc; cc < cc1 - 6; cc += 2, indx += 2) { delp[indx >> 1] = fabsf(cfa[indx + p1] - cfa[indx - p1]); delm[indx >> 1] = fabsf(cfa[indx + m1] - cfa[indx - m1]); Dgrbsq1p[indx >> 1] = (SQR(cfa[indx + 1] - cfa[indx + 1 - p1]) + SQR(cfa[indx + 1] - cfa[indx + 1 + p1])); Dgrbsq1m[indx >> 1] = (SQR(cfa[indx + 1] - cfa[indx + 1 - m1]) + SQR(cfa[indx + 1] - cfa[indx + 1 + m1])); } } else { for (cc = 6, indx = (rr) * TS + cc; cc < cc1 - 6; cc += 2, indx += 2) { Dgrbsq1p[indx >> 1] = (SQR(cfa[indx] - cfa[indx - p1]) + SQR(cfa[indx] - cfa[indx + p1])); Dgrbsq1m[indx >> 1] = (SQR(cfa[indx] - cfa[indx - m1]) + SQR(cfa[indx] - cfa[indx + m1])); delp[indx >> 1] = fabsf(cfa[indx + 1 + p1] - cfa[indx + 1 - p1]); delm[indx >> 1] = fabsf(cfa[indx + 1 + m1] - cfa[indx + 1 - m1]); } } } #endif // end of tile initialization // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //interpolate vertical and horizontal color differences #ifdef __SSE2__ __m128 sgnv, cruv, crdv, crlv, crrv, guhav, gdhav, glhav, grhav, hwtv, vwtv, Gintvhav, Ginthhav, guarv, gdarv, glarv, grarv; vmask clipmask; if( !(FC(4, 4) & 1) ) { sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f ); } else { sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f ); } __m128 zd5v = _mm_set1_ps( 0.5f ); __m128 onev = _mm_set1_ps( 1.0f ); __m128 arthreshv = _mm_set1_ps( arthresh ); __m128 clip_pt8v = _mm_set1_ps( clip_pt8 ); for (rr = 4; rr < rr1 - 4; rr++) { sgnv = -sgnv; for (cc = 4, indx = rr * TS + cc; cc < cc1 - 7; cc += 4, indx += 4) { //color ratios in each cardinal direction cruv = LVF(cfa[indx - v1]) * (LVF(dirwts0[indx - v2]) + LVF(dirwts0[indx])) / (LVF(dirwts0[indx - v2]) * (epsv + LVF(cfa[indx])) + LVF(dirwts0[indx]) * (epsv + LVF(cfa[indx - v2]))); crdv = LVF(cfa[indx + v1]) * (LVF(dirwts0[indx + v2]) + LVF(dirwts0[indx])) / (LVF(dirwts0[indx + v2]) * (epsv + LVF(cfa[indx])) + LVF(dirwts0[indx]) * (epsv + LVF(cfa[indx + v2]))); crlv = LVFU(cfa[indx - 1]) * (LVFU(dirwts1[indx - 2]) + LVF(dirwts1[indx])) / (LVFU(dirwts1[indx - 2]) * (epsv + LVF(cfa[indx])) + LVF(dirwts1[indx]) * (epsv + LVFU(cfa[indx - 2]))); crrv = LVFU(cfa[indx + 1]) * (LVFU(dirwts1[indx + 2]) + LVF(dirwts1[indx])) / (LVFU(dirwts1[indx + 2]) * (epsv + LVF(cfa[indx])) + LVF(dirwts1[indx]) * (epsv + LVFU(cfa[indx + 2]))); guhav = LVF(cfa[indx - v1]) + zd5v * (LVF(cfa[indx]) - LVF(cfa[indx - v2])); gdhav = LVF(cfa[indx + v1]) + zd5v * (LVF(cfa[indx]) - LVF(cfa[indx + v2])); glhav = LVFU(cfa[indx - 1]) + zd5v * (LVF(cfa[indx]) - LVFU(cfa[indx - 2])); grhav = LVFU(cfa[indx + 1]) + zd5v * (LVF(cfa[indx]) - LVFU(cfa[indx + 2])); guarv = vself(vmaskf_lt(vabsf(onev - cruv), arthreshv), LVF(cfa[indx]) * cruv, guhav); gdarv = vself(vmaskf_lt(vabsf(onev - crdv), arthreshv), LVF(cfa[indx]) * crdv, gdhav); glarv = vself(vmaskf_lt(vabsf(onev - crlv), arthreshv), LVF(cfa[indx]) * crlv, glhav); grarv = vself(vmaskf_lt(vabsf(onev - crrv), arthreshv), LVF(cfa[indx]) * crrv, grhav); hwtv = LVFU(dirwts1[indx - 1]) / (LVFU(dirwts1[indx - 1]) + LVFU(dirwts1[indx + 1])); vwtv = LVF(dirwts0[indx - v1]) / (LVF(dirwts0[indx + v1]) + LVF(dirwts0[indx - v1])); //interpolated G via adaptive weights of cardinal evaluations Ginthhav = hwtv * grhav + (onev - hwtv) * glhav; Gintvhav = vwtv * gdhav + (onev - vwtv) * guhav; //interpolated color differences _mm_store_ps( &hcdalt[indx], sgnv * (Ginthhav - LVF(cfa[indx]))); _mm_store_ps( &vcdalt[indx], sgnv * (Gintvhav - LVF(cfa[indx]))); clipmask = vorm( vorm( vmaskf_gt( LVF(cfa[indx]), clip_pt8v ), vmaskf_gt( Gintvhav, clip_pt8v ) ), vmaskf_gt( Ginthhav, clip_pt8v )); guarv = vself( clipmask, guhav, guarv); gdarv = vself( clipmask, gdhav, gdarv); glarv = vself( clipmask, glhav, glarv); grarv = vself( clipmask, grhav, grarv); _mm_store_ps( &vcd[indx], vself( clipmask, LVF(vcdalt[indx]), sgnv * ((vwtv * gdarv + (onev - vwtv)*guarv) - LVF(cfa[indx])))); _mm_store_ps( &hcd[indx], vself( clipmask, LVF(hcdalt[indx]), sgnv * ((hwtv * grarv + (onev - hwtv)*glarv) - LVF(cfa[indx])))); //differences of interpolations in opposite directions _mm_store_ps(&dgintv[indx], _mm_min_ps(SQRV(guhav - gdhav), SQRV(guarv - gdarv))); _mm_store_ps(&dginth[indx], _mm_min_ps(SQRV(glhav - grhav), SQRV(glarv - grarv))); } } #else bool fcswitch; for (rr = 4; rr < rr1 - 4; rr++) { for (cc = 4, indx = rr * TS + cc, fcswitch = FC(rr, cc) & 1; cc < cc1 - 4; cc++, indx++) { //color ratios in each cardinal direction cru = cfa[indx - v1] * (dirwts0[indx - v2] + dirwts0[indx]) / (dirwts0[indx - v2] * (eps + cfa[indx]) + dirwts0[indx] * (eps + cfa[indx - v2])); crd = cfa[indx + v1] * (dirwts0[indx + v2] + dirwts0[indx]) / (dirwts0[indx + v2] * (eps + cfa[indx]) + dirwts0[indx] * (eps + cfa[indx + v2])); crl = cfa[indx - 1] * (dirwts1[indx - 2] + dirwts1[indx]) / (dirwts1[indx - 2] * (eps + cfa[indx]) + dirwts1[indx] * (eps + cfa[indx - 2])); crr = cfa[indx + 1] * (dirwts1[indx + 2] + dirwts1[indx]) / (dirwts1[indx + 2] * (eps + cfa[indx]) + dirwts1[indx] * (eps + cfa[indx + 2])); guha = HCLIP(cfa[indx - v1]) + xdiv2f(cfa[indx] - cfa[indx - v2]); gdha = HCLIP(cfa[indx + v1]) + xdiv2f(cfa[indx] - cfa[indx + v2]); glha = HCLIP(cfa[indx - 1]) + xdiv2f(cfa[indx] - cfa[indx - 2]); grha = HCLIP(cfa[indx + 1]) + xdiv2f(cfa[indx] - cfa[indx + 2]); if (fabsf(1.0f - cru) < arthresh) { guar = cfa[indx] * cru; } else { guar = guha; } if (fabsf(1.0f - crd) < arthresh) { gdar = cfa[indx] * crd; } else { gdar = gdha; } if (fabsf(1.0f - crl) < arthresh) { glar = cfa[indx] * crl; } else { glar = glha; } if (fabsf(1.0f - crr) < arthresh) { grar = cfa[indx] * crr; } else { grar = grha; } hwt = dirwts1[indx - 1] / (dirwts1[indx - 1] + dirwts1[indx + 1]); vwt = dirwts0[indx - v1] / (dirwts0[indx + v1] + dirwts0[indx - v1]); //interpolated G via adaptive weights of cardinal evaluations Gintvha = vwt * gdha + (1.0f - vwt) * guha; Ginthha = hwt * grha + (1.0f - hwt) * glha; //interpolated color differences if (fcswitch) { vcd[indx] = cfa[indx] - (vwt * gdar + (1.0f - vwt) * guar); hcd[indx] = cfa[indx] - (hwt * grar + (1.0f - hwt) * glar); vcdalt[indx] = cfa[indx] - Gintvha; hcdalt[indx] = cfa[indx] - Ginthha; } else { //interpolated color differences vcd[indx] = (vwt * gdar + (1.0f - vwt) * guar) - cfa[indx]; hcd[indx] = (hwt * grar + (1.0f - hwt) * glar) - cfa[indx]; vcdalt[indx] = Gintvha - cfa[indx]; hcdalt[indx] = Ginthha - cfa[indx]; } fcswitch = !fcswitch; if (cfa[indx] > clip_pt8 || Gintvha > clip_pt8 || Ginthha > clip_pt8) { //use HA if highlights are (nearly) clipped guar = guha; gdar = gdha; glar = glha; grar = grha; vcd[indx] = vcdalt[indx]; hcd[indx] = hcdalt[indx]; } //differences of interpolations in opposite directions dgintv[indx] = min(SQR(guha - gdha), SQR(guar - gdar)); dginth[indx] = min(SQR(glha - grha), SQR(glar - grar)); } } #endif #ifdef __SSE2__ __m128 hcdvarv, vcdvarv; __m128 hcdaltvarv, vcdaltvarv, hcdv, vcdv, hcdaltv, vcdaltv, sgn3v, Ginthv, Gintvv, hcdoldv, vcdoldv; __m128 threev = _mm_set1_ps( 3.0f ); __m128 clip_ptv = _mm_set1_ps( clip_pt ); __m128 nsgnv; vmask hcdmask, vcdmask, tempmask; if( !(FC(4, 4) & 1) ) { sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f ); } else { sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f ); } sgn3v = threev * sgnv; for (rr = 4; rr < rr1 - 4; rr++) { nsgnv = sgnv; sgnv = -sgnv; sgn3v = -sgn3v; for (cc = 4, indx = rr * TS + cc, c = FC(rr, cc) & 1; cc < cc1 - 4; cc += 4, indx += 4) { hcdv = LVF( hcd[indx] ); hcdvarv = threev * (SQRV(LVFU(hcd[indx - 2])) + SQRV(hcdv) + SQRV(LVFU(hcd[indx + 2]))) - SQRV(LVFU(hcd[indx - 2]) + hcdv + LVFU(hcd[indx + 2])); hcdaltv = LVF( hcdalt[indx] ); hcdaltvarv = threev * (SQRV(LVFU(hcdalt[indx - 2])) + SQRV(hcdaltv) + SQRV(LVFU(hcdalt[indx + 2]))) - SQRV(LVFU(hcdalt[indx - 2]) + hcdaltv + LVFU(hcdalt[indx + 2])); vcdv = LVF( vcd[indx] ); vcdvarv = threev * (SQRV(LVF(vcd[indx - v2])) + SQRV(vcdv) + SQRV(LVF(vcd[indx + v2]))) - SQRV(LVF(vcd[indx - v2]) + vcdv + LVF(vcd[indx + v2])); vcdaltv = LVF( vcdalt[indx] ); vcdaltvarv = threev * (SQRV(LVF(vcdalt[indx - v2])) + SQRV(vcdaltv) + SQRV(LVF(vcdalt[indx + v2]))) - SQRV(LVF(vcdalt[indx - v2]) + vcdaltv + LVF(vcdalt[indx + v2])); //choose the smallest variance; this yields a smoother interpolation hcdv = vself( vmaskf_lt( hcdaltvarv, hcdvarv ), hcdaltv, hcdv); vcdv = vself( vmaskf_lt( vcdaltvarv, vcdvarv ), vcdaltv, vcdv); Ginthv = sgnv * hcdv + LVF( cfa[indx] ); temp2v = sgn3v * hcdv; hwtv = onev + temp2v / ( epsv + Ginthv + LVF( cfa[indx])); hcdmask = vmaskf_gt( nsgnv * hcdv, ZEROV ); hcdoldv = hcdv; tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Ginthv, LVFU(cfa[indx - 1]), LVFU(cfa[indx + 1]) )); hcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx]) + Ginthv)), tempv, hwtv * hcdv + (onev - hwtv) * tempv); hcdv = vself( hcdmask, hcdv, hcdoldv ); hcdv = vself( vmaskf_gt( Ginthv, clip_ptv), tempv, hcdv); _mm_store_ps( &hcd[indx], hcdv); Gintvv = sgnv * vcdv + LVF( cfa[indx] ); temp2v = sgn3v * vcdv; vwtv = onev + temp2v / ( epsv + Gintvv + LVF( cfa[indx])); vcdmask = vmaskf_gt( nsgnv * vcdv, ZEROV ); vcdoldv = vcdv; tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Gintvv, LVF(cfa[indx - v1]), LVF(cfa[indx + v1]) )); vcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx]) + Gintvv)), tempv, vwtv * vcdv + (onev - vwtv) * tempv); vcdv = vself( vcdmask, vcdv, vcdoldv ); vcdv = vself( vmaskf_gt( Gintvv, clip_ptv), tempv, vcdv); _mm_store_ps( &vcd[indx], vcdv); _mm_storeu_ps(&cddiffsq[indx], SQRV(vcdv - hcdv)); } } #else for (rr = 4; rr < rr1 - 4; rr++) { //for (cc=4+(FC(rr,2)&1),indx=rr*TS+cc,c=FC(rr,cc); cc 0) { if (3.0f * hcd[indx] > (Ginth + cfa[indx])) { hcd[indx] = -ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx]; } else { hwt = 1.0f - 3.0f * hcd[indx] / (eps + Ginth + cfa[indx]); hcd[indx] = hwt * hcd[indx] + (1.0f - hwt) * (-ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx]); } } if (vcd[indx] > 0) { if (3.0f * vcd[indx] > (Gintv + cfa[indx])) { vcd[indx] = -ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx]; } else { vwt = 1.0f - 3.0f * vcd[indx] / (eps + Gintv + cfa[indx]); vcd[indx] = vwt * vcd[indx] + (1.0f - vwt) * (-ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx]); } } if (Ginth > clip_pt) { hcd[indx] = -ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx]; //for RT implementation } if (Gintv > clip_pt) { vcd[indx] = -ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx]; } //if (Ginth > pre_mul[c]) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for dcraw implementation //if (Gintv > pre_mul[c]) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]; } else {//R or B site Ginth = hcd[indx] + cfa[indx]; //interpolated G Gintv = vcd[indx] + cfa[indx]; if (hcd[indx] < 0) { if (3.0f * hcd[indx] < -(Ginth + cfa[indx])) { hcd[indx] = ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx]; } else { hwt = 1.0f + 3.0f * hcd[indx] / (eps + Ginth + cfa[indx]); hcd[indx] = hwt * hcd[indx] + (1.0f - hwt) * (ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx]); } } if (vcd[indx] < 0) { if (3.0f * vcd[indx] < -(Gintv + cfa[indx])) { vcd[indx] = ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx]; } else { vwt = 1.0f + 3.0f * vcd[indx] / (eps + Gintv + cfa[indx]); vcd[indx] = vwt * vcd[indx] + (1.0f - vwt) * (ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx]); } } if (Ginth > clip_pt) { hcd[indx] = ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx]; //for RT implementation } if (Gintv > clip_pt) { vcd[indx] = ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx]; } //if (Ginth > pre_mul[c]) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for dcraw implementation //if (Gintv > pre_mul[c]) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]; cddiffsq[indx] = SQR(vcd[indx] - hcd[indx]); } c = !c; } } #endif #ifdef __SSE2__ __m128 uavev, davev, lavev, ravev, Dgrbvvaruv, Dgrbvvardv, Dgrbhvarlv, Dgrbhvarrv, varwtv, diffwtv, vcdvar1v, hcdvar1v; __m128 epssqv = _mm_set1_ps( epssq ); vmask decmask; for (rr = 6; rr < rr1 - 6; rr++) { for (cc = 6 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 6; cc += 8, indx += 8) { //compute color difference variances in cardinal directions tempv = LC2VFU(vcd[indx]); uavev = tempv + LC2VFU(vcd[indx - v1]) + LC2VFU(vcd[indx - v2]) + LC2VFU(vcd[indx - v3]); davev = tempv + LC2VFU(vcd[indx + v1]) + LC2VFU(vcd[indx + v2]) + LC2VFU(vcd[indx + v3]); Dgrbvvaruv = SQRV(tempv - uavev) + SQRV(LC2VFU(vcd[indx - v1]) - uavev) + SQRV(LC2VFU(vcd[indx - v2]) - uavev) + SQRV(LC2VFU(vcd[indx - v3]) - uavev); Dgrbvvardv = SQRV(tempv - davev) + SQRV(LC2VFU(vcd[indx + v1]) - davev) + SQRV(LC2VFU(vcd[indx + v2]) - davev) + SQRV(LC2VFU(vcd[indx + v3]) - davev); hwtv = LC2VFU(dirwts1[indx - 1]) / (LC2VFU(dirwts1[indx - 1]) + LC2VFU(dirwts1[indx + 1])); vwtv = LC2VFU(dirwts0[indx - v1]) / (LC2VFU(dirwts0[indx + v1]) + LC2VFU(dirwts0[indx - v1])); tempv = LC2VFU(hcd[indx]); lavev = tempv + LC2VFU(hcd[indx - 1]) + LC2VFU(hcd[indx - 2]) + LC2VFU(hcd[indx - 3]); ravev = tempv + LC2VFU(hcd[indx + 1]) + LC2VFU(hcd[indx + 2]) + LC2VFU(hcd[indx + 3]); Dgrbhvarlv = SQRV(tempv - lavev) + SQRV(LC2VFU(hcd[indx - 1]) - lavev) + SQRV(LC2VFU(hcd[indx - 2]) - lavev) + SQRV(LC2VFU(hcd[indx - 3]) - lavev); Dgrbhvarrv = SQRV(tempv - ravev) + SQRV(LC2VFU(hcd[indx + 1]) - ravev) + SQRV(LC2VFU(hcd[indx + 2]) - ravev) + SQRV(LC2VFU(hcd[indx + 3]) - ravev); vcdvarv = epssqv + vwtv * Dgrbvvardv + (onev - vwtv) * Dgrbvvaruv; hcdvarv = epssqv + hwtv * Dgrbhvarrv + (onev - hwtv) * Dgrbhvarlv; //compute fluctuations in up/down and left/right interpolations of colors Dgrbvvaruv = (LC2VFU(dgintv[indx])) + (LC2VFU(dgintv[indx - v1])) + (LC2VFU(dgintv[indx - v2])); Dgrbvvardv = (LC2VFU(dgintv[indx])) + (LC2VFU(dgintv[indx + v1])) + (LC2VFU(dgintv[indx + v2])); Dgrbhvarlv = (LC2VFU(dginth[indx])) + (LC2VFU(dginth[indx - 1])) + (LC2VFU(dginth[indx - 2])); Dgrbhvarrv = (LC2VFU(dginth[indx])) + (LC2VFU(dginth[indx + 1])) + (LC2VFU(dginth[indx + 2])); vcdvar1v = epssqv + vwtv * Dgrbvvardv + (onev - vwtv) * Dgrbvvaruv; hcdvar1v = epssqv + hwtv * Dgrbhvarrv + (onev - hwtv) * Dgrbhvarlv; //determine adaptive weights for G interpolation varwtv = hcdvarv / (vcdvarv + hcdvarv); diffwtv = hcdvar1v / (vcdvar1v + hcdvar1v); //if both agree on interpolation direction, choose the one with strongest directional discrimination; //otherwise, choose the u/d and l/r difference fluctuation weights decmask = vandm( vmaskf_gt( (zd5v - varwtv) * (zd5v - diffwtv), ZEROV ), vmaskf_lt( vabsf( zd5v - diffwtv), vabsf( zd5v - varwtv) ) ); _mm_storeu_ps( &hvwt[indx >> 1], vself( decmask, varwtv, diffwtv)); } } #else for (rr = 6; rr < rr1 - 6; rr++) { for (cc = 6 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 6; cc += 2, indx += 2) { //compute color difference variances in cardinal directions uave = vcd[indx] + vcd[indx - v1] + vcd[indx - v2] + vcd[indx - v3]; dave = vcd[indx] + vcd[indx + v1] + vcd[indx + v2] + vcd[indx + v3]; lave = hcd[indx] + hcd[indx - 1] + hcd[indx - 2] + hcd[indx - 3]; rave = hcd[indx] + hcd[indx + 1] + hcd[indx + 2] + hcd[indx + 3]; Dgrbvvaru = SQR(vcd[indx] - uave) + SQR(vcd[indx - v1] - uave) + SQR(vcd[indx - v2] - uave) + SQR(vcd[indx - v3] - uave); Dgrbvvard = SQR(vcd[indx] - dave) + SQR(vcd[indx + v1] - dave) + SQR(vcd[indx + v2] - dave) + SQR(vcd[indx + v3] - dave); Dgrbhvarl = SQR(hcd[indx] - lave) + SQR(hcd[indx - 1] - lave) + SQR(hcd[indx - 2] - lave) + SQR(hcd[indx - 3] - lave); Dgrbhvarr = SQR(hcd[indx] - rave) + SQR(hcd[indx + 1] - rave) + SQR(hcd[indx + 2] - rave) + SQR(hcd[indx + 3] - rave); hwt = dirwts1[indx - 1] / (dirwts1[indx - 1] + dirwts1[indx + 1]); vwt = dirwts0[indx - v1] / (dirwts0[indx + v1] + dirwts0[indx - v1]); vcdvar = epssq + vwt * Dgrbvvard + (1.0f - vwt) * Dgrbvvaru; hcdvar = epssq + hwt * Dgrbhvarr + (1.0f - hwt) * Dgrbhvarl; //compute fluctuations in up/down and left/right interpolations of colors Dgrbvvaru = (dgintv[indx]) + (dgintv[indx - v1]) + (dgintv[indx - v2]); Dgrbvvard = (dgintv[indx]) + (dgintv[indx + v1]) + (dgintv[indx + v2]); Dgrbhvarl = (dginth[indx]) + (dginth[indx - 1]) + (dginth[indx - 2]); Dgrbhvarr = (dginth[indx]) + (dginth[indx + 1]) + (dginth[indx + 2]); vcdvar1 = epssq + vwt * Dgrbvvard + (1.0f - vwt) * Dgrbvvaru; hcdvar1 = epssq + hwt * Dgrbhvarr + (1.0f - hwt) * Dgrbhvarl; //determine adaptive weights for G interpolation varwt = hcdvar / (vcdvar + hcdvar); diffwt = hcdvar1 / (vcdvar1 + hcdvar1); //if both agree on interpolation direction, choose the one with strongest directional discrimination; //otherwise, choose the u/d and l/r difference fluctuation weights if ((0.5 - varwt) * (0.5 - diffwt) > 0 && fabsf(0.5 - diffwt) < fabsf(0.5 - varwt)) { hvwt[indx >> 1] = varwt; } else { hvwt[indx >> 1] = diffwt; } //hvwt[indx]=varwt; } } #endif // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Nyquist test for (rr = 6; rr < rr1 - 6; rr++) for (cc = 6 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 6; cc += 2, indx += 2) { //nyquist texture test: ask if difference of vcd compared to hcd is larger or smaller than RGGB gradients nyqtest = (gaussodd[0] * cddiffsq[indx] + gaussodd[1] * (cddiffsq[(indx - m1)] + cddiffsq[(indx + p1)] + cddiffsq[(indx - p1)] + cddiffsq[(indx + m1)]) + gaussodd[2] * (cddiffsq[(indx - v2)] + cddiffsq[(indx - 2)] + cddiffsq[(indx + 2)] + cddiffsq[(indx + v2)]) + gaussodd[3] * (cddiffsq[(indx - m2)] + cddiffsq[(indx + p2)] + cddiffsq[(indx - p2)] + cddiffsq[(indx + m2)])); nyqtest -= nyqthresh * (gaussgrad[0] * (delhvsqsum[indx]) + gaussgrad[1] * (delhvsqsum[indx - v1] + delhvsqsum[indx + 1] + delhvsqsum[indx - 1] + delhvsqsum[indx + v1]) + gaussgrad[2] * (delhvsqsum[indx - m1] + delhvsqsum[indx + p1] + delhvsqsum[indx - p1] + delhvsqsum[indx + m1]) + gaussgrad[3] * (delhvsqsum[indx - v2] + delhvsqsum[indx - 2] + delhvsqsum[indx + 2] + delhvsqsum[indx + v2]) + gaussgrad[4] * (delhvsqsum[indx - 2 * TS - 1] + delhvsqsum[indx - 2 * TS + 1] + delhvsqsum[indx - TS - 2] + delhvsqsum[indx - TS + 2] + delhvsqsum[indx + TS - 2] + delhvsqsum[indx + TS + 2] + delhvsqsum[indx + 2 * TS - 1] + delhvsqsum[indx + 2 * TS + 1]) + gaussgrad[5] * (delhvsqsum[indx - m2] + delhvsqsum[indx + p2] + delhvsqsum[indx - p2] + delhvsqsum[indx + m2])); if (nyqtest > 0) { nyquist[indx >> 1] = 1; //nyquist=1 for nyquist region } } unsigned int nyquisttemp; for (rr = 8; rr < rr1 - 8; rr++) { for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 8; cc += 2, indx += 2) { nyquisttemp = (nyquist[(indx - v2) >> 1] + nyquist[(indx - m1) >> 1] + nyquist[(indx + p1) >> 1] + nyquist[(indx - 2) >> 1] + nyquist[indx >> 1] + nyquist[(indx + 2) >> 1] + nyquist[(indx - p1) >> 1] + nyquist[(indx + m1) >> 1] + nyquist[(indx + v2) >> 1]); //if most of your neighbors are named Nyquist, it's likely that you're one too if (nyquisttemp > 4) { nyquist[indx >> 1] = 1; } //or not if (nyquisttemp < 4) { nyquist[indx >> 1] = 0; } } } // end of Nyquist test // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // in areas of Nyquist texture, do area interpolation for (rr = 8; rr < rr1 - 8; rr++) for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 8; cc += 2, indx += 2) { if (nyquist[indx >> 1]) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // area interpolation sumh = sumv = sumsqh = sumsqv = areawt = 0; for (i = -6; i < 7; i += 2) for (j = -6; j < 7; j += 2) { indx1 = (rr + i) * TS + cc + j; if (nyquist[indx1 >> 1]) { sumh += cfa[indx1] - xdiv2f(cfa[indx1 - 1] + cfa[indx1 + 1]); sumv += cfa[indx1] - xdiv2f(cfa[indx1 - v1] + cfa[indx1 + v1]); sumsqh += xdiv2f(SQR(cfa[indx1] - cfa[indx1 - 1]) + SQR(cfa[indx1] - cfa[indx1 + 1])); sumsqv += xdiv2f(SQR(cfa[indx1] - cfa[indx1 - v1]) + SQR(cfa[indx1] - cfa[indx1 + v1])); areawt += 1; } } //horizontal and vertical color differences, and adaptive weight hcdvar = epssq + fabsf(areawt * sumsqh - sumh * sumh); vcdvar = epssq + fabsf(areawt * sumsqv - sumv * sumv); hvwt[indx >> 1] = hcdvar / (vcdvar + hcdvar); // end of area interpolation // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //populate G at R/B sites for (rr = 8; rr < rr1 - 8; rr++) for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 8; cc += 2, indx += 2) { //first ask if one gets more directional discrimination from nearby B/R sites hvwtalt = xdivf(hvwt[(indx - m1) >> 1] + hvwt[(indx + p1) >> 1] + hvwt[(indx - p1) >> 1] + hvwt[(indx + m1) >> 1], 2); // hvwtalt = 0.25*(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1]); // vo=fabsf(0.5-hvwt[indx>>1]); // ve=fabsf(0.5-hvwtalt); if (fabsf(0.5 - hvwt[indx >> 1]) < fabsf(0.5 - hvwtalt)) { hvwt[indx >> 1] = hvwtalt; //a better result was obtained from the neighbors } // if (vo>1]=hvwtalt;}//a better result was obtained from the neighbors Dgrb[0][indx >> 1] = (hcd[indx] * (1.0f - hvwt[indx >> 1]) + vcd[indx] * hvwt[indx >> 1]); //evaluate color differences //if (hvwt[indx]<0.5) Dgrb[indx][0]=hcd[indx]; //if (hvwt[indx]>0.5) Dgrb[indx][0]=vcd[indx]; rgbgreen[indx] = cfa[indx] + Dgrb[0][indx >> 1]; //evaluate G (finally!) //local curvature in G (preparation for nyquist refinement step) if (nyquist[indx >> 1]) { Dgrb2[indx >> 1].h = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx - 1] + rgbgreen[indx + 1])); Dgrb2[indx >> 1].v = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx - v1] + rgbgreen[indx + v1])); } else { Dgrb2[indx >> 1].h = Dgrb2[indx >> 1].v = 0; } } //end of standard interpolation // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // refine Nyquist areas using G curvatures for (rr = 8; rr < rr1 - 8; rr++) for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc; cc < cc1 - 8; cc += 2, indx += 2) { if (nyquist[indx >> 1]) { //local averages (over Nyquist pixels only) of G curvature squared gvarh = epssq + (gquinc[0] * Dgrb2[indx >> 1].h + gquinc[1] * (Dgrb2[(indx - m1) >> 1].h + Dgrb2[(indx + p1) >> 1].h + Dgrb2[(indx - p1) >> 1].h + Dgrb2[(indx + m1) >> 1].h) + gquinc[2] * (Dgrb2[(indx - v2) >> 1].h + Dgrb2[(indx - 2) >> 1].h + Dgrb2[(indx + 2) >> 1].h + Dgrb2[(indx + v2) >> 1].h) + gquinc[3] * (Dgrb2[(indx - m2) >> 1].h + Dgrb2[(indx + p2) >> 1].h + Dgrb2[(indx - p2) >> 1].h + Dgrb2[(indx + m2) >> 1].h)); gvarv = epssq + (gquinc[0] * Dgrb2[indx >> 1].v + gquinc[1] * (Dgrb2[(indx - m1) >> 1].v + Dgrb2[(indx + p1) >> 1].v + Dgrb2[(indx - p1) >> 1].v + Dgrb2[(indx + m1) >> 1].v) + gquinc[2] * (Dgrb2[(indx - v2) >> 1].v + Dgrb2[(indx - 2) >> 1].v + Dgrb2[(indx + 2) >> 1].v + Dgrb2[(indx + v2) >> 1].v) + gquinc[3] * (Dgrb2[(indx - m2) >> 1].v + Dgrb2[(indx + p2) >> 1].v + Dgrb2[(indx - p2) >> 1].v + Dgrb2[(indx + m2) >> 1].v)); //use the results as weights for refined G interpolation Dgrb[0][indx >> 1] = (hcd[indx] * gvarv + vcd[indx] * gvarh) / (gvarv + gvarh); rgbgreen[indx] = cfa[indx] + Dgrb[0][indx >> 1]; } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // diagonal interpolation correction #ifdef __SSE2__ __m128 rbsev, rbnwv, rbnev, rbswv, cfav, rbmv, rbpv, temp1v, wtv; __m128 wtsev, wtnwv, wtnev, wtswv, rbvarmv; __m128 gausseven0v = _mm_set1_ps(gausseven[0]); __m128 gausseven1v = _mm_set1_ps(gausseven[1]); __m128 twov = _mm_set1_ps(2.0f); #endif for (rr = 8; rr < rr1 - 8; rr++) { #ifdef __SSE2__ for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc, indx1 = indx >> 1; cc < cc1 - 8; cc += 8, indx += 8, indx1 += 4) { //diagonal color ratios cfav = LC2VFU(cfa[indx]); temp1v = LC2VFU(cfa[indx + m1]); temp2v = LC2VFU(cfa[indx + m2]); rbsev = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbsev = vself(vmaskf_lt(vabsf(onev - rbsev), arthreshv), cfav * rbsev, temp1v + zd5v * (cfav - temp2v)); temp1v = LC2VFU(cfa[indx - m1]); temp2v = LC2VFU(cfa[indx - m2]); rbnwv = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbnwv = vself(vmaskf_lt(vabsf(onev - rbnwv), arthreshv), cfav * rbnwv, temp1v + zd5v * (cfav - temp2v)); temp1v = epsv + LVFU(delm[indx1]); wtsev = temp1v + LVFU(delm[(indx + m1) >> 1]) + LVFU(delm[(indx + m2) >> 1]); //same as for wtu,wtd,wtl,wtr wtnwv = temp1v + LVFU(delm[(indx - m1) >> 1]) + LVFU(delm[(indx - m2) >> 1]); rbmv = (wtsev * rbnwv + wtnwv * rbsev) / (wtsev + wtnwv); temp1v = ULIMV(rbmv , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])); wtv = twov * (cfav - rbmv) / (epsv + rbmv + cfav); temp2v = wtv * rbmv + (onev - wtv) * temp1v; temp2v = vself(vmaskf_lt(rbmv + rbmv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbmv, cfav), temp2v, rbmv); _mm_storeu_ps(&rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])), temp2v )); temp1v = LC2VFU(cfa[indx + p1]); temp2v = LC2VFU(cfa[indx + p2]); rbnev = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbnev = vself(vmaskf_lt(vabsf(onev - rbnev), arthreshv), cfav * rbnev, temp1v + zd5v * (cfav - temp2v)); temp1v = LC2VFU(cfa[indx - p1]); temp2v = LC2VFU(cfa[indx - p2]); rbswv = (temp1v + temp1v) / (epsv + cfav + temp2v ); rbswv = vself(vmaskf_lt(vabsf(onev - rbswv), arthreshv), cfav * rbswv, temp1v + zd5v * (cfav - temp2v)); temp1v = epsv + LVFU(delp[indx1]); wtnev = temp1v + LVFU(delp[(indx + p1) >> 1]) + LVFU(delp[(indx + p2) >> 1]); wtswv = temp1v + LVFU(delp[(indx - p1) >> 1]) + LVFU(delp[(indx - p2) >> 1]); rbpv = (wtnev * rbswv + wtswv * rbnev) / (wtnev + wtswv); temp1v = ULIMV(rbpv , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])); wtv = twov * (cfav - rbpv) / (epsv + rbpv + cfav); temp2v = wtv * rbpv + (onev - wtv) * temp1v; temp2v = vself(vmaskf_lt(rbpv + rbpv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbpv, cfav), temp2v, rbpv); _mm_storeu_ps(&rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])), temp2v )); rbvarmv = epssqv + (gausseven0v * (LVFU(Dgrbsq1m[(indx - v1) >> 1]) + LVFU(Dgrbsq1m[(indx - 1) >> 1]) + LVFU(Dgrbsq1m[(indx + 1) >> 1]) + LVFU(Dgrbsq1m[(indx + v1) >> 1])) + gausseven1v * (LVFU(Dgrbsq1m[(indx - v2 - 1) >> 1]) + LVFU(Dgrbsq1m[(indx - v2 + 1) >> 1]) + LVFU(Dgrbsq1m[(indx - 2 - v1) >> 1]) + LVFU(Dgrbsq1m[(indx + 2 - v1) >> 1]) + LVFU(Dgrbsq1m[(indx - 2 + v1) >> 1]) + LVFU(Dgrbsq1m[(indx + 2 + v1) >> 1]) + LVFU(Dgrbsq1m[(indx + v2 - 1) >> 1]) + LVFU(Dgrbsq1m[(indx + v2 + 1) >> 1]))); _mm_storeu_ps(&pmwt[indx1] , rbvarmv / ((epssqv + (gausseven0v * (LVFU(Dgrbsq1p[(indx - v1) >> 1]) + LVFU(Dgrbsq1p[(indx - 1) >> 1]) + LVFU(Dgrbsq1p[(indx + 1) >> 1]) + LVFU(Dgrbsq1p[(indx + v1) >> 1])) + gausseven1v * (LVFU(Dgrbsq1p[(indx - v2 - 1) >> 1]) + LVFU(Dgrbsq1p[(indx - v2 + 1) >> 1]) + LVFU(Dgrbsq1p[(indx - 2 - v1) >> 1]) + LVFU(Dgrbsq1p[(indx + 2 - v1) >> 1]) + LVFU(Dgrbsq1p[(indx - 2 + v1) >> 1]) + LVFU(Dgrbsq1p[(indx + 2 + v1) >> 1]) + LVFU(Dgrbsq1p[(indx + v2 - 1) >> 1]) + LVFU(Dgrbsq1p[(indx + v2 + 1) >> 1])))) + rbvarmv)); } #else for (cc = 8 + (FC(rr, 2) & 1), indx = rr * TS + cc, indx1 = indx >> 1; cc < cc1 - 8; cc += 2, indx += 2, indx1++) { //diagonal color ratios crse = xmul2f(cfa[indx + m1]) / (eps + cfa[indx] + (cfa[indx + m2])); crnw = xmul2f(cfa[indx - m1]) / (eps + cfa[indx] + (cfa[indx - m2])); crne = xmul2f(cfa[indx + p1]) / (eps + cfa[indx] + (cfa[indx + p2])); crsw = xmul2f(cfa[indx - p1]) / (eps + cfa[indx] + (cfa[indx - p2])); //assign B/R at R/B sites if (fabsf(1.0f - crse) < arthresh) { rbse = cfa[indx] * crse; //use this if more precise diag interp is necessary } else { rbse = (cfa[indx + m1]) + xdiv2f(cfa[indx] - cfa[indx + m2]); } if (fabsf(1.0f - crnw) < arthresh) { rbnw = cfa[indx] * crnw; } else { rbnw = (cfa[indx - m1]) + xdiv2f(cfa[indx] - cfa[indx - m2]); } if (fabsf(1.0f - crne) < arthresh) { rbne = cfa[indx] * crne; } else { rbne = (cfa[indx + p1]) + xdiv2f(cfa[indx] - cfa[indx + p2]); } if (fabsf(1.0f - crsw) < arthresh) { rbsw = cfa[indx] * crsw; } else { rbsw = (cfa[indx - p1]) + xdiv2f(cfa[indx] - cfa[indx - p2]); } wtse = eps + delm[indx1] + delm[(indx + m1) >> 1] + delm[(indx + m2) >> 1]; //same as for wtu,wtd,wtl,wtr wtnw = eps + delm[indx1] + delm[(indx - m1) >> 1] + delm[(indx - m2) >> 1]; wtne = eps + delp[indx1] + delp[(indx + p1) >> 1] + delp[(indx + p2) >> 1]; wtsw = eps + delp[indx1] + delp[(indx - p1) >> 1] + delp[(indx - p2) >> 1]; rbm[indx1] = (wtse * rbnw + wtnw * rbse) / (wtse + wtnw); rbp[indx1] = (wtne * rbsw + wtsw * rbne) / (wtne + wtsw); /* rbvarp = epssq + (gausseven[0]*(Dgrbsq1[indx-v1].p+Dgrbsq1[indx-1].p+Dgrbsq1[indx+1].p+Dgrbsq1[indx+v1].p) + gausseven[1]*(Dgrbsq1[indx-v2-1].p+Dgrbsq1[indx-v2+1].p+Dgrbsq1[indx-2-v1].p+Dgrbsq1[indx+2-v1].p+ Dgrbsq1[indx-2+v1].p+Dgrbsq1[indx+2+v1].p+Dgrbsq1[indx+v2-1].p+Dgrbsq1[indx+v2+1].p)); */ rbvarm = epssq + (gausseven[0] * (Dgrbsq1m[(indx - v1) >> 1] + Dgrbsq1m[(indx - 1) >> 1] + Dgrbsq1m[(indx + 1) >> 1] + Dgrbsq1m[(indx + v1) >> 1]) + gausseven[1] * (Dgrbsq1m[(indx - v2 - 1) >> 1] + Dgrbsq1m[(indx - v2 + 1) >> 1] + Dgrbsq1m[(indx - 2 - v1) >> 1] + Dgrbsq1m[(indx + 2 - v1) >> 1] + Dgrbsq1m[(indx - 2 + v1) >> 1] + Dgrbsq1m[(indx + 2 + v1) >> 1] + Dgrbsq1m[(indx + v2 - 1) >> 1] + Dgrbsq1m[(indx + v2 + 1) >> 1])); pmwt[indx1] = rbvarm / ((epssq + (gausseven[0] * (Dgrbsq1p[(indx - v1) >> 1] + Dgrbsq1p[(indx - 1) >> 1] + Dgrbsq1p[(indx + 1) >> 1] + Dgrbsq1p[(indx + v1) >> 1]) + gausseven[1] * (Dgrbsq1p[(indx - v2 - 1) >> 1] + Dgrbsq1p[(indx - v2 + 1) >> 1] + Dgrbsq1p[(indx - 2 - v1) >> 1] + Dgrbsq1p[(indx + 2 - v1) >> 1] + Dgrbsq1p[(indx - 2 + v1) >> 1] + Dgrbsq1p[(indx + 2 + v1) >> 1] + Dgrbsq1p[(indx + v2 - 1) >> 1] + Dgrbsq1p[(indx + v2 + 1) >> 1]))) + rbvarm); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //bound the interpolation in regions of high saturation if (rbp[indx1] < cfa[indx]) { if (xmul2f(rbp[indx1]) < cfa[indx]) { rbp[indx1] = ULIM(rbp[indx1] , cfa[indx - p1], cfa[indx + p1]); } else { pwt = xmul2f(cfa[indx] - rbp[indx1]) / (eps + rbp[indx1] + cfa[indx]); rbp[indx1] = pwt * rbp[indx1] + (1.0f - pwt) * ULIM(rbp[indx1], cfa[indx - p1], cfa[indx + p1]); } } if (rbm[indx1] < cfa[indx]) { if (xmul2f(rbm[indx1]) < cfa[indx]) { rbm[indx1] = ULIM(rbm[indx1] , cfa[indx - m1], cfa[indx + m1]); } else { mwt = xmul2f(cfa[indx] - rbm[indx1]) / (eps + rbm[indx1] + cfa[indx]); rbm[indx1] = mwt * rbm[indx1] + (1.0f - mwt) * ULIM(rbm[indx1], cfa[indx - m1], cfa[indx + m1]); } } if (rbp[indx1] > clip_pt) { rbp[indx1] = ULIM(rbp[indx1], cfa[indx - p1], cfa[indx + p1]); //for RT implementation } if (rbm[indx1] > clip_pt) { rbm[indx1] = ULIM(rbm[indx1], cfa[indx - m1], cfa[indx + m1]); } //c=2-FC(rr,cc);//for dcraw implementation //if (rbp[indx] > pre_mul[c]) rbp[indx]=ULIM(rbp[indx],cfa[indx-p1],cfa[indx+p1]); //if (rbm[indx] > pre_mul[c]) rbm[indx]=ULIM(rbm[indx],cfa[indx-m1],cfa[indx+m1]); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //rbint[indx] = 0.5*(cfa[indx] + (rbp*rbvarm+rbm*rbvarp)/(rbvarp+rbvarm));//this is R+B, interpolated } #endif } #ifdef __SSE2__ __m128 pmwtaltv; __m128 zd25v = _mm_set1_ps(0.25f); #endif for (rr = 10; rr < rr1 - 10; rr++) #ifdef __SSE2__ for (cc = 10 + (FC(rr, 2) & 1), indx = rr * TS + cc, indx1 = indx >> 1; cc < cc1 - 10; cc += 8, indx += 8, indx1 += 4) { //first ask if one gets more directional discrimination from nearby B/R sites pmwtaltv = zd25v * (LVFU(pmwt[(indx - m1) >> 1]) + LVFU(pmwt[(indx + p1) >> 1]) + LVFU(pmwt[(indx - p1) >> 1]) + LVFU(pmwt[(indx + m1) >> 1])); tempv = LVFU(pmwt[indx1]); tempv = vself(vmaskf_lt(vabsf(zd5v - tempv), vabsf(zd5v - pmwtaltv)), pmwtaltv, tempv); _mm_storeu_ps( &pmwt[indx1], tempv); _mm_storeu_ps( &rbint[indx1], zd5v * (LC2VFU(cfa[indx]) + LVFU(rbm[indx1]) * (onev - tempv) + LVFU(rbp[indx1]) * tempv)); } #else for (cc = 10 + (FC(rr, 2) & 1), indx = rr * TS + cc, indx1 = indx >> 1; cc < cc1 - 10; cc += 2, indx += 2, indx1++) { //first ask if one gets more directional discrimination from nearby B/R sites pmwtalt = xdivf(pmwt[(indx - m1) >> 1] + pmwt[(indx + p1) >> 1] + pmwt[(indx - p1) >> 1] + pmwt[(indx + m1) >> 1], 2); if (fabsf(0.5 - pmwt[indx1]) < fabsf(0.5 - pmwtalt)) { pmwt[indx1] = pmwtalt; //a better result was obtained from the neighbors } rbint[indx1] = xdiv2f(cfa[indx] + rbm[indx1] * (1.0f - pmwt[indx1]) + rbp[indx1] * pmwt[indx1]); //this is R+B, interpolated } #endif for (rr = 12; rr < rr1 - 12; rr++) for (cc = 12 + (FC(rr, 2) & 1), indx = rr * TS + cc, indx1 = indx >> 1; cc < cc1 - 12; cc += 2, indx += 2, indx1++) { if (fabsf(0.5 - pmwt[indx >> 1]) < fabsf(0.5 - hvwt[indx >> 1]) ) { continue; } //now interpolate G vertically/horizontally using R+B values //unfortunately, since G interpolation cannot be done diagonally this may lead to color shifts //color ratios for G interpolation cru = cfa[indx - v1] * 2.0 / (eps + rbint[indx1] + rbint[(indx1 - v1)]); crd = cfa[indx + v1] * 2.0 / (eps + rbint[indx1] + rbint[(indx1 + v1)]); crl = cfa[indx - 1] * 2.0 / (eps + rbint[indx1] + rbint[(indx1 - 1)]); crr = cfa[indx + 1] * 2.0 / (eps + rbint[indx1] + rbint[(indx1 + 1)]); //interpolated G via adaptive ratios or Hamilton-Adams in each cardinal direction if (fabsf(1.0f - cru) < arthresh) { gu = rbint[indx1] * cru; } else { gu = cfa[indx - v1] + xdiv2f(rbint[indx1] - rbint[(indx1 - v1)]); } if (fabsf(1.0f - crd) < arthresh) { gd = rbint[indx1] * crd; } else { gd = cfa[indx + v1] + xdiv2f(rbint[indx1] - rbint[(indx1 + v1)]); } if (fabsf(1.0f - crl) < arthresh) { gl = rbint[indx1] * crl; } else { gl = cfa[indx - 1] + xdiv2f(rbint[indx1] - rbint[(indx1 - 1)]); } if (fabsf(1.0f - crr) < arthresh) { gr = rbint[indx1] * crr; } else { gr = cfa[indx + 1] + xdiv2f(rbint[indx1] - rbint[(indx1 + 1)]); } //gu=rbint[indx]*cru; //gd=rbint[indx]*crd; //gl=rbint[indx]*crl; //gr=rbint[indx]*crr; //interpolated G via adaptive weights of cardinal evaluations Gintv = (dirwts0[indx - v1] * gd + dirwts0[indx + v1] * gu) / (dirwts0[indx + v1] + dirwts0[indx - v1]); Ginth = (dirwts1[indx - 1] * gr + dirwts1[indx + 1] * gl) / (dirwts1[indx - 1] + dirwts1[indx + 1]); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //bound the interpolation in regions of high saturation if (Gintv < rbint[indx1]) { if (2 * Gintv < rbint[indx1]) { Gintv = ULIM(Gintv , cfa[indx - v1], cfa[indx + v1]); } else { vwt = 2.0 * (rbint[indx1] - Gintv) / (eps + Gintv + rbint[indx1]); Gintv = vwt * Gintv + (1.0f - vwt) * ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]); } } if (Ginth < rbint[indx1]) { if (2 * Ginth < rbint[indx1]) { Ginth = ULIM(Ginth , cfa[indx - 1], cfa[indx + 1]); } else { hwt = 2.0 * (rbint[indx1] - Ginth) / (eps + Ginth + rbint[indx1]); Ginth = hwt * Ginth + (1.0f - hwt) * ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]); } } if (Ginth > clip_pt) { Ginth = ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]); //for RT implementation } if (Gintv > clip_pt) { Gintv = ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]); } //c=FC(rr,cc);//for dcraw implementation //if (Ginth > pre_mul[c]) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]); //if (Gintv > pre_mul[c]) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]); // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rgbgreen[indx] = Ginth * (1.0f - hvwt[indx1]) + Gintv * hvwt[indx1]; //rgb[indx][1] = 0.5*(rgb[indx][1]+0.25*(rgb[indx-v1][1]+rgb[indx+v1][1]+rgb[indx-1][1]+rgb[indx+1][1])); Dgrb[0][indx >> 1] = rgbgreen[indx] - cfa[indx]; //rgb[indx][2-FC(rr,cc)]=2*rbint[indx]-cfa[indx]; } //end of diagonal interpolation correction // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //fancy chrominance interpolation //(ey,ex) is location of R site for (rr = 13 - ey; rr < rr1 - 12; rr += 2) for (cc = 13 - ex, indx1 = (rr * TS + cc) >> 1; cc < cc1 - 12; cc += 2, indx1++) { //B coset Dgrb[1][indx1] = Dgrb[0][indx1]; //split out G-B from G-R Dgrb[0][indx1] = 0; } #ifdef __SSE2__ // __m128 wtnwv,wtnev,wtswv,wtsev; __m128 oned325v = _mm_set1_ps( 1.325f ); __m128 zd175v = _mm_set1_ps( 0.175f ); __m128 zd075v = _mm_set1_ps( 0.075f ); #endif for (rr = 14; rr < rr1 - 14; rr++) #ifdef __SSE2__ for (cc = 14 + (FC(rr, 2) & 1), indx = rr * TS + cc, c = 1 - FC(rr, cc) / 2; cc < cc1 - 14; cc += 8, indx += 8) { wtnwv = onev / (epsv + vabsf(LVFU(Dgrb[c][(indx - m1) >> 1]) - LVFU(Dgrb[c][(indx + m1) >> 1])) + vabsf(LVFU(Dgrb[c][(indx - m1) >> 1]) - LVFU(Dgrb[c][(indx - m3) >> 1])) + vabsf(LVFU(Dgrb[c][(indx + m1) >> 1]) - LVFU(Dgrb[c][(indx - m3) >> 1]))); wtnev = onev / (epsv + vabsf(LVFU(Dgrb[c][(indx + p1) >> 1]) - LVFU(Dgrb[c][(indx - p1) >> 1])) + vabsf(LVFU(Dgrb[c][(indx + p1) >> 1]) - LVFU(Dgrb[c][(indx + p3) >> 1])) + vabsf(LVFU(Dgrb[c][(indx - p1) >> 1]) - LVFU(Dgrb[c][(indx + p3) >> 1]))); wtswv = onev / (epsv + vabsf(LVFU(Dgrb[c][(indx - p1) >> 1]) - LVFU(Dgrb[c][(indx + p1) >> 1])) + vabsf(LVFU(Dgrb[c][(indx - p1) >> 1]) - LVFU(Dgrb[c][(indx + m3) >> 1])) + vabsf(LVFU(Dgrb[c][(indx + p1) >> 1]) - LVFU(Dgrb[c][(indx - p3) >> 1]))); wtsev = onev / (epsv + vabsf(LVFU(Dgrb[c][(indx + m1) >> 1]) - LVFU(Dgrb[c][(indx - m1) >> 1])) + vabsf(LVFU(Dgrb[c][(indx + m1) >> 1]) - LVFU(Dgrb[c][(indx - p3) >> 1])) + vabsf(LVFU(Dgrb[c][(indx - m1) >> 1]) - LVFU(Dgrb[c][(indx + m3) >> 1]))); //Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse); _mm_storeu_ps(&Dgrb[c][indx >> 1], (wtnwv * (oned325v * LVFU(Dgrb[c][(indx - m1) >> 1]) - zd175v * LVFU(Dgrb[c][(indx - m3) >> 1]) - zd075v * LVFU(Dgrb[c][(indx - m1 - 2) >> 1]) - zd075v * LVFU(Dgrb[c][(indx - m1 - v2) >> 1]) ) + wtnev * (oned325v * LVFU(Dgrb[c][(indx + p1) >> 1]) - zd175v * LVFU(Dgrb[c][(indx + p3) >> 1]) - zd075v * LVFU(Dgrb[c][(indx + p1 + 2) >> 1]) - zd075v * LVFU(Dgrb[c][(indx + p1 + v2) >> 1]) ) + wtswv * (oned325v * LVFU(Dgrb[c][(indx - p1) >> 1]) - zd175v * LVFU(Dgrb[c][(indx - p3) >> 1]) - zd075v * LVFU(Dgrb[c][(indx - p1 - 2) >> 1]) - zd075v * LVFU(Dgrb[c][(indx - p1 - v2) >> 1]) ) + wtsev * (oned325v * LVFU(Dgrb[c][(indx + m1) >> 1]) - zd175v * LVFU(Dgrb[c][(indx + m3) >> 1]) - zd075v * LVFU(Dgrb[c][(indx + m1 + 2) >> 1]) - zd075v * LVFU(Dgrb[c][(indx + m1 + v2) >> 1]) )) / (wtnwv + wtnev + wtswv + wtsev)); } #else for (cc = 14 + (FC(rr, 2) & 1), indx = rr * TS + cc, c = 1 - FC(rr, cc) / 2; cc < cc1 - 14; cc += 2, indx += 2) { wtnw = 1.0f / (eps + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx + m1) >> 1]) + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx - m3) >> 1]) + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - m3) >> 1])); wtne = 1.0f / (eps + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx - p1) >> 1]) + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx + p3) >> 1]) + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + p3) >> 1])); wtsw = 1.0f / (eps + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + p1) >> 1]) + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + m3) >> 1]) + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx - p3) >> 1])); wtse = 1.0f / (eps + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - m1) >> 1]) + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - p3) >> 1]) + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx + m3) >> 1])); //Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse); Dgrb[c][indx >> 1] = (wtnw * (1.325f * Dgrb[c][(indx - m1) >> 1] - 0.175f * Dgrb[c][(indx - m3) >> 1] - 0.075f * Dgrb[c][(indx - m1 - 2) >> 1] - 0.075f * Dgrb[c][(indx - m1 - v2) >> 1] ) + wtne * (1.325f * Dgrb[c][(indx + p1) >> 1] - 0.175f * Dgrb[c][(indx + p3) >> 1] - 0.075f * Dgrb[c][(indx + p1 + 2) >> 1] - 0.075f * Dgrb[c][(indx + p1 + v2) >> 1] ) + wtsw * (1.325f * Dgrb[c][(indx - p1) >> 1] - 0.175f * Dgrb[c][(indx - p3) >> 1] - 0.075f * Dgrb[c][(indx - p1 - 2) >> 1] - 0.075f * Dgrb[c][(indx - p1 - v2) >> 1] ) + wtse * (1.325f * Dgrb[c][(indx + m1) >> 1] - 0.175f * Dgrb[c][(indx + m3) >> 1] - 0.075f * Dgrb[c][(indx + m1 + 2) >> 1] - 0.075f * Dgrb[c][(indx + m1 + v2) >> 1] )) / (wtnw + wtne + wtsw + wtse); } #endif float temp; for (rr = 16; rr < rr1 - 16; rr++) { if((FC(rr, 2) & 1) == 1) { for (cc = 16, indx = rr * TS + cc, row = rr + top; cc < cc1 - 16 - (cc1 & 1); cc += 2, indx++) { col = cc + left; temp = 1.0f / ((hvwt[(indx - v1) >> 1]) + (1.0f - hvwt[(indx + 1) >> 1]) + (1.0f - hvwt[(indx - 1) >> 1]) + (hvwt[(indx + v1) >> 1])); red[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1]) * temp); blue[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1]) * temp); indx++; col++; red[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[0][indx >> 1]); blue[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[1][indx >> 1]); } if(cc1 & 1) { // width of tile is odd col = cc + left; temp = 1.0f / ((hvwt[(indx - v1) >> 1]) + (1.0f - hvwt[(indx + 1) >> 1]) + (1.0f - hvwt[(indx - 1) >> 1]) + (hvwt[(indx + v1) >> 1])); red[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1]) * temp); blue[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1]) * temp); } } else { for (cc = 16, indx = rr * TS + cc, row = rr + top; cc < cc1 - 16 - (cc1 & 1); cc += 2, indx++) { col = cc + left; red[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[0][indx >> 1]); blue[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[1][indx >> 1]); indx++; col++; temp = 1.0f / ((hvwt[(indx - v1) >> 1]) + (1.0f - hvwt[(indx + 1) >> 1]) + (1.0f - hvwt[(indx - 1) >> 1]) + (hvwt[(indx + v1) >> 1])); red[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1]) * temp); blue[row][col] = 65535.0f * (rgbgreen[indx] - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1] + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1] + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1] + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1]) * temp); } if(cc1 & 1) { // width of tile is odd col = cc + left; red[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[0][indx >> 1]); blue[row][col] = 65535.0f * (rgbgreen[indx] - Dgrb[1][indx >> 1]); } } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // copy smoothed results back to image matrix for (rr = 16; rr < rr1 - 16; rr++) { #ifdef __SSE2__ for (row = rr + top, cc = 16; cc < cc1 - 19; cc += 4) { _mm_storeu_ps(&green[row][cc + left], LVF(rgbgreen[rr * TS + cc]) * c65535v); } #else for (row = rr + top, cc = 16; cc < cc1 - 16; cc++) { col = cc + left; indx = rr * TS + cc; green[row][col] = ((65535.0f * rgbgreen[indx])); //for dcraw implementation //for (c=0; c<3; c++){ // image[indx][c] = CLIP((int)(65535.0f*rgb[rr*TS+cc][c] + 0.5f)); //} } #endif } //end of main loop if(plistener) { progresscounter++; if(progresscounter % 4 == 0) { #pragma omp critical { progress += (double)4 * ((TS - 32) * (TS - 32)) / (height * width); if (progress > 1.0) { progress = 1.0; } plistener->setProgress(progress); } } } } // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // clean up free(buffer); } if(plistener) { plistener->setProgress(1.0); } // done #undef TS } }