Files
rawTherapee/rtengine/amaze_demosaic_RT.cc
Hombre 8b2eac9a3d Pipette and "On Preview Widgets" branch. See issue 227
The pipette part is already working quite nice but need to be finished. The widgets part needs more work...
2014-01-21 23:37:36 +01:00

1420 lines
61 KiB
C++

////////////////////////////////////////////////////////////////
//
// AMaZE demosaic algorithm
// (Aliasing Minimization and Zipper Elimination)
//
// copyright (c) 2008-2010 Emil Martinec <ejmartin@uchicago.edu>
//
// incorporating ideas of Luis Sanz Rodrigues and Paul Lee
//
// code dated: May 27, 2010
//
// amaze_interpolate_RT.cc is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////
#include "rtengine.h"
#include "rawimagesource.h"
#include "rt_math.h"
#include "../rtgui/multilangmgr.h"
#include "procparams.h"
#include "sleef.c"
#include "opthelper.h"
namespace rtengine {
SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) {
#define HCLIP(x) x //is this still necessary???
//min(clip_pt,x)
int width=winw, height=winh;
const float clip_pt = 1/initialGain;
const float clip_pt8 = 0.8f/initialGain;
#define TS 160 // Tile size; the image is processed in square tiles to lower memory requirements and facilitate multi-threading
#define TSH 80 // half of Tile size
// local variables
//offset of R pixel within a Bayer quartet
int ex, ey;
//shifts of pointer value to access pixels in vertical and diagonal directions
static const int v1=TS, v2=2*TS, v3=3*TS, p1=-TS+1, p2=-2*TS+2, p3=-3*TS+3, m1=TS+1, m2=2*TS+2, m3=3*TS+3;
//tolerance to avoid dividing by zero
static const float eps=1e-5, epssq=1e-10; //tolerance to avoid dividing by zero
//adaptive ratios threshold
static const float arthresh=0.75;
//nyquist texture test threshold
static const float nyqthresh=0.5;
//gaussian on 5x5 quincunx, sigma=1.2
static const float gaussodd[4] = {0.14659727707323927f, 0.103592713382435f, 0.0732036125103057f, 0.0365543548389495f};
//gaussian on 5x5, sigma=1.2
static const float gaussgrad[6] = {0.07384411893421103f, 0.06207511968171489f, 0.0521818194747806f,
0.03687419286733595f, 0.03099732204057846f, 0.018413194161458882f};
//gaussian on 5x5 alt quincunx, sigma=1.5
static const float gausseven[2] = {0.13719494435797422f, 0.05640252782101291f};
//guassian on quincunx grid
static const float gquinc[4] = {0.169917f, 0.108947f, 0.069855f, 0.0287182f};
volatile double progress = 0.0;
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Issue 1676
// Moved from inside the parallel section
if (plistener) {
plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::methodstring[RAWParams::amaze]));
plistener->setProgress (0.0);
}
struct s_hv {
float h;
float v;
};
#pragma omp parallel
{
int progresscounter=0;
//position of top/left corner of the tile
int top, left;
// beginning of storage block for tile
char *buffer;
// green values
float (*rgbgreen);
// sum of square of horizontal gradient and square of vertical gradient
float (*delhvsqsum);
// gradient based directional weights for interpolation
float (*dirwts0);
float (*dirwts1);
// vertically interpolated color differences G-R, G-B
float (*vcd);
// horizontally interpolated color differences
float (*hcd);
// alternative vertical interpolation
float (*vcdalt);
// alternative horizontal interpolation
float (*hcdalt);
// square of average color difference
float (*cddiffsq);
// weight to give horizontal vs vertical interpolation
float (*hvwt);
// final interpolated color difference
float (*Dgrb)[TS*TSH];
// float (*Dgrb)[2];
// gradient in plus (NE/SW) direction
float (*delp);
// gradient in minus (NW/SE) direction
float (*delm);
// diagonal interpolation of R+B
float (*rbint);
// horizontal and vertical curvature of interpolated G (used to refine interpolation in Nyquist texture regions)
s_hv (*Dgrb2);
// difference between up/down interpolations of G
float (*dgintv);
// difference between left/right interpolations of G
float (*dginth);
// diagonal (plus) color difference R-B or G1-G2
// float (*Dgrbp1);
// diagonal (minus) color difference R-B or G1-G2
// float (*Dgrbm1);
float (*Dgrbsq1m);
float (*Dgrbsq1p);
// s_mp (*Dgrbsq1);
// square of diagonal color difference
// float (*Dgrbpsq1);
// square of diagonal color difference
// float (*Dgrbmsq1);
// tile raw data
float (*cfa);
// relative weight for combining plus and minus diagonal interpolations
float (*pmwt);
// interpolated color difference R-B in minus and plus direction
float (*rbm);
float (*rbp);
// nyquist texture flag 1=nyquist, 0=not nyquist
char (*nyquist);
#define CLF 1
// assign working space
buffer = (char *) calloc(22*sizeof(float)*TS*TS + sizeof(char)*TS*TSH+23*CLF*64 + 63, 1);
char *data;
data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
//merror(buffer,"amaze_interpolate()");
rgbgreen = (float (*)) data; //pointers to array
delhvsqsum = (float (*)) ((char*)rgbgreen + sizeof(float)*TS*TS + CLF*64);
dirwts0 = (float (*)) ((char*)delhvsqsum + sizeof(float)*TS*TS + CLF*64);
dirwts1 = (float (*)) ((char*)dirwts0 + sizeof(float)*TS*TS + CLF*64);
vcd = (float (*)) ((char*)dirwts1 + sizeof(float)*TS*TS + CLF*64);
hcd = (float (*)) ((char*)vcd + sizeof(float)*TS*TS + CLF*64);
vcdalt = (float (*)) ((char*)hcd + sizeof(float)*TS*TS + CLF*64);
hcdalt = (float (*)) ((char*)vcdalt + sizeof(float)*TS*TS + CLF*64);
cddiffsq = (float (*)) ((char*)hcdalt + sizeof(float)*TS*TS + CLF*64);
hvwt = (float (*)) ((char*)cddiffsq + sizeof(float)*TS*TS + CLF*64);
Dgrb = (float (*)[TS*TSH]) ((char*)hvwt + sizeof(float)*TS*TSH + CLF*64);
delp = (float (*)) ((char*)Dgrb + sizeof(float)*TS*TS + CLF*64);
delm = (float (*)) ((char*)delp + sizeof(float)*TS*TSH + CLF*64);
rbint = (float (*)) ((char*)delm + sizeof(float)*TS*TSH + CLF*64);
Dgrb2 = (s_hv (*)) ((char*)rbint + sizeof(float)*TS*TSH + CLF*64);
dgintv = (float (*)) ((char*)Dgrb2 + sizeof(float)*TS*TS + CLF*64);
dginth = (float (*)) ((char*)dgintv + sizeof(float)*TS*TS + CLF*64);
Dgrbsq1m = (float (*)) ((char*)dginth + sizeof(float)*TS*TS + CLF*64);
Dgrbsq1p = (float (*)) ((char*)Dgrbsq1m + sizeof(float)*TS*TSH + CLF*64);
cfa = (float (*)) ((char*)Dgrbsq1p + sizeof(float)*TS*TSH + CLF*64);
pmwt = (float (*)) ((char*)cfa + sizeof(float)*TS*TS + CLF*64);
rbm = (float (*)) ((char*)pmwt + sizeof(float)*TS*TSH + CLF*64);
rbp = (float (*)) ((char*)rbm + sizeof(float)*TS*TSH + CLF*64);
nyquist = (char (*)) ((char*)rbp + sizeof(float)*TS*TSH + CLF*64);
#undef CLF
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//determine GRBG coset; (ey,ex) is the offset of the R subarray
if (FC(0,0)==1) {//first pixel is G
if (FC(0,1)==0) {ey=0; ex=1;} else {ey=1; ex=0;}
} else {//first pixel is R or B
if (FC(0,0)==0) {ey=0; ex=0;} else {ey=1; ex=1;}
}
// Main algorithm: Tile loop
//#pragma omp parallel for shared(rawData,height,width,red,green,blue) private(top,left) schedule(dynamic)
//code is openmp ready; just have to pull local tile variable declarations inside the tile loop
// Issue 1676
// use collapse(2) to collapse the 2 loops to one large loop, so there is better scaling
#pragma omp for schedule(dynamic) collapse(2) nowait
for (top=winy-16; top < winy+height; top += TS-32)
for (left=winx-16; left < winx+width; left += TS-32) {
memset(nyquist, 0, sizeof(char)*TS*TSH);
memset(rbint, 0, sizeof(float)*TS*TSH);
//location of tile bottom edge
int bottom = min(top+TS,winy+height+16);
//location of tile right edge
int right = min(left+TS, winx+width+16);
//tile width (=TS except for right edge of image)
int rr1 = bottom - top;
//tile height (=TS except for bottom edge of image)
int cc1 = right - left;
//tile vars
//counters for pixel location in the image
int row, col;
//min and max row/column in the tile
int rrmin, rrmax, ccmin, ccmax;
//counters for pixel location within the tile
int rr, cc;
//color index 0=R, 1=G, 2=B
int c;
//pointer counters within the tile
int indx, indx1;
//dummy indices
int i, j;
//color ratios in up/down/left/right directions
float cru, crd, crl, crr;
//adaptive weights for vertical/horizontal/plus/minus directions
float vwt, hwt, pwt, mwt;
//vertical and horizontal G interpolations
float Gintv, Ginth;
//G interpolated in vert/hor directions using adaptive ratios
float guar, gdar, glar, grar;
//G interpolated in vert/hor directions using Hamilton-Adams method
float guha, gdha, glha, grha;
//interpolated G from fusing left/right or up/down
float Ginthar, Ginthha, Gintvar, Gintvha;
//color difference (G-R or G-B) variance in up/down/left/right directions
float Dgrbvvaru, Dgrbvvard, Dgrbhvarl, Dgrbhvarr;
float uave, dave, lave, rave;
//color difference variances in vertical and horizontal directions
float vcdvar, hcdvar, vcdvar1, hcdvar1, hcdaltvar, vcdaltvar;
//adaptive interpolation weight using variance of color differences
float varwt; // 639 - 644
//adaptive interpolation weight using difference of left-right and up-down G interpolations
float diffwt; // 640 - 644
//alternative adaptive weight for combining horizontal/vertical interpolations
float hvwtalt; // 745 - 748
//interpolation of G in four directions
float gu, gd, gl, gr;
//variance of G in vertical/horizontal directions
float gvarh, gvarv;
//Nyquist texture test
float nyqtest; // 658 - 681
//accumulators for Nyquist texture interpolation
float sumh, sumv, sumsqh, sumsqv, areawt;
//color ratios in diagonal directions
float crse, crnw, crne, crsw;
//color differences in diagonal directions
float rbse, rbnw, rbne, rbsw;
//adaptive weights for combining diagonal interpolations
float wtse, wtnw, wtsw, wtne;
//alternate weight for combining diagonal interpolations
float pmwtalt; // 885 - 888
//variance of R-B in plus/minus directions
float rbvarm; // 843 - 848
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// rgb from input CFA data
// rgb values should be floating point number between 0 and 1
// after white balance multipliers are applied
// a 16 pixel border is added to each side of the image
// bookkeeping for borders
if (top<winy) {rrmin=16;} else {rrmin=0;}
if (left<winx) {ccmin=16;} else {ccmin=0;}
if (bottom>(winy+height)) {rrmax=winy+height-top;} else {rrmax=rr1;}
if (right>(winx+width)) {ccmax=winx+width-left;} else {ccmax=cc1;}
#ifdef __SSE2__
const __m128 c65535v = _mm_set1_ps( 65535.0f );
__m128 tempv;
for (rr=rrmin; rr < rrmax; rr++){
for (row=rr+top, cc=ccmin; cc < ccmax-3; cc+=4) {
indx1=rr*TS+cc;
tempv = LVFU(rawData[row][cc+left]) / c65535v;
_mm_store_ps( &cfa[indx1], tempv );
_mm_store_ps( &rgbgreen[indx1], tempv );
}
for (; cc < ccmax; cc++) {
indx1=rr*TS+cc;
cfa[indx1] = (rawData[row][cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[indx1] = cfa[indx1];
}
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//fill borders
if (rrmin>0) {
for (rr=0; rr<16; rr++)
for (cc=ccmin,row = 32-rr+top; cc<ccmax; cc++) {
cfa[rr*TS+cc] = (rawData[row][cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
}
}
if (rrmax<rr1) {
for (rr=0; rr<16; rr++)
for (cc=ccmin; cc<ccmax; cc+=4) {
indx1 = (rrmax+rr)*TS+cc;
tempv = LVFU(rawData[(winy+height-rr-2)][left+cc]) / c65535v;
_mm_store_ps( &cfa[indx1], tempv );
_mm_store_ps( &rgbgreen[indx1], tempv );
}
}
if (ccmin>0) {
for (rr=rrmin; rr<rrmax; rr++)
for (cc=0,row = rr + top; cc<16; cc++) {
cfa[rr*TS+cc] = (rawData[row][32-cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
}
}
if (ccmax<cc1) {
for (rr=rrmin; rr<rrmax; rr++)
for (cc=0; cc<16; cc++) {
cfa[rr*TS+ccmax+cc] = (rawData[(top+rr)][(winx+width-cc-2)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+ccmax+cc] = cfa[rr*TS+ccmax+cc];
}
}
//also, fill the image corners
if (rrmin>0 && ccmin>0) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc+=4) {
indx1 = (rr)*TS+cc;
tempv = LVFU(rawData[winy+32-rr][winx+32-cc]) / c65535v;
_mm_store_ps( &cfa[indx1], tempv );
_mm_store_ps( &rgbgreen[indx1], tempv );
}
}
if (rrmax<rr1 && ccmax<cc1) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc+=4) {
indx1 = (rrmax+rr)*TS+ccmax+cc;
tempv = LVFU(rawData[(winy+height-rr-2)][(winx+width-cc-2)]) / c65535v;
_mm_storeu_ps( &cfa[indx1], tempv );
_mm_storeu_ps( &rgbgreen[indx1], tempv );
}
}
if (rrmin>0 && ccmax<cc1) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rr)*TS+ccmax+cc] = (rawData[(winy+32-rr)][(winx+width-cc-2)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rr)*TS+ccmax+cc] = cfa[(rr)*TS+ccmax+cc];
}
}
if (rrmax<rr1 && ccmin>0) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
}
}
#else
for (rr=rrmin; rr < rrmax; rr++)
for (row=rr+top, cc=ccmin; cc < ccmax; cc++) {
indx1=rr*TS+cc;
cfa[indx1] = (rawData[row][cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[indx1] = cfa[indx1];
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//fill borders
if (rrmin>0) {
for (rr=0; rr<16; rr++)
for (cc=ccmin,row = 32-rr+top; cc<ccmax; cc++) {
cfa[rr*TS+cc] = (rawData[row][cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
}
}
if (rrmax<rr1) {
for (rr=0; rr<16; rr++)
for (cc=ccmin; cc<ccmax; cc++) {
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][left+cc])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
}
}
if (ccmin>0) {
for (rr=rrmin; rr<rrmax; rr++)
for (cc=0,row = rr + top; cc<16; cc++) {
cfa[rr*TS+cc] = (rawData[row][32-cc+left])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
}
}
if (ccmax<cc1) {
for (rr=rrmin; rr<rrmax; rr++)
for (cc=0; cc<16; cc++) {
cfa[rr*TS+ccmax+cc] = (rawData[(top+rr)][(winx+width-cc-2)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[rr*TS+ccmax+cc] = cfa[rr*TS+ccmax+cc];
}
}
//also, fill the image corners
if (rrmin>0 && ccmin>0) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rr)*TS+cc] = (rawData[winy+32-rr][winx+32-cc])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rr)*TS+cc] = cfa[(rr)*TS+cc];
}
}
if (rrmax<rr1 && ccmax<cc1) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rrmax+rr)*TS+ccmax+cc] = (rawData[(winy+height-rr-2)][(winx+width-cc-2)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rrmax+rr)*TS+ccmax+cc] = cfa[(rrmax+rr)*TS+ccmax+cc];
}
}
if (rrmin>0 && ccmax<cc1) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rr)*TS+ccmax+cc] = (rawData[(winy+32-rr)][(winx+width-cc-2)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rr)*TS+ccmax+cc] = cfa[(rr)*TS+ccmax+cc];
}
}
if (rrmax<rr1 && ccmin>0) {
for (rr=0; rr<16; rr++)
for (cc=0; cc<16; cc++) {
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f;
if(FC(rr,cc)==1)
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
}
}
#endif
//end of border fill
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#ifdef __SSE2__
__m128 delhv,delvv;
const __m128 epsv = _mm_set1_ps( eps );
for (rr=2; rr < rr1-2; rr++) {
for (cc=0, indx=(rr)*TS+cc; cc < cc1; cc+=4, indx+=4) {
delhv = vabsf( LVFU( cfa[indx+1] ) - LVFU( cfa[indx-1] ) );
delvv = vabsf( LVF( cfa[indx+v1] ) - LVF( cfa[indx-v1] ) );
_mm_store_ps( &dirwts1[indx], epsv + vabsf( LVFU( cfa[indx+2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVFU( cfa[indx-2] )) + delhv );
delhv = delhv * delhv;
_mm_store_ps( &dirwts0[indx], epsv + vabsf( LVF( cfa[indx+v2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVF( cfa[indx-v2] )) + delvv );
delvv = delvv * delvv;
_mm_store_ps( &delhvsqsum[indx], delhv + delvv);
}
}
#else
// horizontal and vedrtical gradient
float delh,delv;
for (rr=2; rr < rr1-2; rr++)
for (cc=2, indx=(rr)*TS+cc; cc < cc1-2; cc++, indx++) {
delh = fabsf(cfa[indx+1]-cfa[indx-1]);
delv = fabsf(cfa[indx+v1]-cfa[indx-v1]);
dirwts0[indx] = eps+fabsf(cfa[indx+v2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-v2])+delv;
dirwts1[indx] = eps+fabsf(cfa[indx+2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-2])+delh;//+fabsf(cfa[indx+2]-cfa[indx-2]);
delhvsqsum[indx] = SQR(delh) + SQR(delv);
}
#endif
#ifdef __SSE2__
__m128 Dgrbsq1pv, Dgrbsq1mv,temp2v;
for (rr=6; rr < rr1-6; rr++){
if((FC(rr,2)&1)==0) {
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) {
tempv = LC2VFU(cfa[indx+1]);
Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx+1-p1]))+SQRV(tempv-LC2VFU(cfa[indx+1+p1])));
_mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+p1])-LC2VFU(cfa[indx-p1])));
_mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+m1])-LC2VFU(cfa[indx-m1])));
Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx+1-m1]))+SQRV(tempv-LC2VFU(cfa[indx+1+m1])));
_mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv );
_mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv );
}
}
else {
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) {
tempv = LC2VFU(cfa[indx]);
Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx-p1]))+SQRV(tempv-LC2VFU(cfa[indx+p1])));
_mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+1+p1])-LC2VFU(cfa[indx+1-p1])));
_mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+1+m1])-LC2VFU(cfa[indx+1-m1])));
Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx-m1]))+SQRV(tempv-LC2VFU(cfa[indx+m1])));
_mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv );
_mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv );
}
}
}
#else
for (rr=6; rr < rr1-6; rr++){
if((FC(rr,2)&1)==0) {
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) {
delp[indx>>1] = fabsf(cfa[indx+p1]-cfa[indx-p1]);
delm[indx>>1] = fabsf(cfa[indx+m1]-cfa[indx-m1]);
Dgrbsq1p[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-p1])+SQR(cfa[indx+1]-cfa[indx+1+p1]));
Dgrbsq1m[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-m1])+SQR(cfa[indx+1]-cfa[indx+1+m1]));
}
}
else {
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) {
Dgrbsq1p[indx>>1]=(SQR(cfa[indx]-cfa[indx-p1])+SQR(cfa[indx]-cfa[indx+p1]));
Dgrbsq1m[indx>>1]=(SQR(cfa[indx]-cfa[indx-m1])+SQR(cfa[indx]-cfa[indx+m1]));
delp[indx>>1] = fabsf(cfa[indx+1+p1]-cfa[indx+1-p1]);
delm[indx>>1] = fabsf(cfa[indx+1+m1]-cfa[indx+1-m1]);
}
}
}
#endif
// end of tile initialization
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//interpolate vertical and horizontal color differences
#ifdef __SSE2__
__m128 sgnv,cruv,crdv,crlv,crrv,guhav,gdhav,glhav,grhav,hwtv,vwtv,Gintvhav,Ginthhav,guarv,gdarv,glarv,grarv;
vmask clipmask;
if( !(FC(4,4)&1) )
sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f );
else
sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f );
__m128 zd5v = _mm_set1_ps( 0.5f );
__m128 onev = _mm_set1_ps( 1.0f );
__m128 arthreshv = _mm_set1_ps( arthresh );
__m128 clip_pt8v = _mm_set1_ps( clip_pt8 );
for (rr=4; rr<rr1-4; rr++) {
sgnv = -sgnv;
for (cc=4,indx=rr*TS+cc; cc<cc1-7; cc+=4,indx+=4) {
//color ratios in each cardinal direction
cruv = LVF(cfa[indx-v1])*(LVF(dirwts0[indx-v2])+LVF(dirwts0[indx]))/(LVF(dirwts0[indx-v2])*(epsv+LVF(cfa[indx]))+LVF(dirwts0[indx])*(epsv+LVF(cfa[indx-v2])));
crdv = LVF(cfa[indx+v1])*(LVF(dirwts0[indx+v2])+LVF(dirwts0[indx]))/(LVF(dirwts0[indx+v2])*(epsv+LVF(cfa[indx]))+LVF(dirwts0[indx])*(epsv+LVF(cfa[indx+v2])));
crlv = LVFU(cfa[indx-1])*(LVFU(dirwts1[indx-2])+LVF(dirwts1[indx]))/(LVFU(dirwts1[indx-2])*(epsv+LVF(cfa[indx]))+LVF(dirwts1[indx])*(epsv+LVFU(cfa[indx-2])));
crrv = LVFU(cfa[indx+1])*(LVFU(dirwts1[indx+2])+LVF(dirwts1[indx]))/(LVFU(dirwts1[indx+2])*(epsv+LVF(cfa[indx]))+LVF(dirwts1[indx])*(epsv+LVFU(cfa[indx+2])));
guhav=LVF(cfa[indx-v1])+zd5v*(LVF(cfa[indx])-LVF(cfa[indx-v2]));
gdhav=LVF(cfa[indx+v1])+zd5v*(LVF(cfa[indx])-LVF(cfa[indx+v2]));
glhav=LVFU(cfa[indx-1])+zd5v*(LVF(cfa[indx])-LVFU(cfa[indx-2]));
grhav=LVFU(cfa[indx+1])+zd5v*(LVF(cfa[indx])-LVFU(cfa[indx+2]));
guarv = vself(vmaskf_lt(vabsf(onev-cruv), arthreshv), LVF(cfa[indx])*cruv, guhav);
gdarv = vself(vmaskf_lt(vabsf(onev-crdv), arthreshv), LVF(cfa[indx])*crdv, gdhav);
glarv = vself(vmaskf_lt(vabsf(onev-crlv), arthreshv), LVF(cfa[indx])*crlv, glhav);
grarv = vself(vmaskf_lt(vabsf(onev-crrv), arthreshv), LVF(cfa[indx])*crrv, grhav);
hwtv = LVFU(dirwts1[indx-1])/(LVFU(dirwts1[indx-1])+LVFU(dirwts1[indx+1]));
vwtv = LVF(dirwts0[indx-v1])/(LVF(dirwts0[indx+v1])+LVF(dirwts0[indx-v1]));
//interpolated G via adaptive weights of cardinal evaluations
Ginthhav = hwtv*grhav+(onev-hwtv)*glhav;
Gintvhav = vwtv*gdhav+(onev-vwtv)*guhav;
//interpolated color differences
_mm_store_ps( &hcdalt[indx], sgnv*(Ginthhav-LVF(cfa[indx])));
_mm_store_ps( &vcdalt[indx], sgnv*(Gintvhav-LVF(cfa[indx])));
clipmask = vorm( vorm( vmaskf_gt( LVF(cfa[indx]), clip_pt8v ), vmaskf_gt( Gintvhav, clip_pt8v ) ), vmaskf_gt( Ginthhav, clip_pt8v ));
guarv = vself( clipmask, guhav, guarv);
gdarv = vself( clipmask, gdhav, gdarv);
glarv = vself( clipmask, glhav, glarv);
grarv = vself( clipmask, grhav, grarv);
_mm_store_ps( &vcd[indx], vself( clipmask, LVF(vcdalt[indx]), sgnv*((vwtv*gdarv+(onev-vwtv)*guarv)-LVF(cfa[indx]))));
_mm_store_ps( &hcd[indx], vself( clipmask, LVF(hcdalt[indx]), sgnv*((hwtv*grarv+(onev-hwtv)*glarv)-LVF(cfa[indx]))));
//differences of interpolations in opposite directions
_mm_store_ps(&dgintv[indx],_mm_min_ps(SQRV(guhav-gdhav),SQRV(guarv-gdarv)));
_mm_store_ps(&dginth[indx],_mm_min_ps(SQRV(glhav-grhav),SQRV(glarv-grarv)));
}
}
#else
bool fcswitch;
for (rr=4; rr<rr1-4; rr++) {
for (cc=4,indx=rr*TS+cc,fcswitch = FC(rr,cc)&1; cc<cc1-4; cc++,indx++) {
//color ratios in each cardinal direction
cru = cfa[indx-v1]*(dirwts0[indx-v2]+dirwts0[indx])/(dirwts0[indx-v2]*(eps+cfa[indx])+dirwts0[indx]*(eps+cfa[indx-v2]));
crd = cfa[indx+v1]*(dirwts0[indx+v2]+dirwts0[indx])/(dirwts0[indx+v2]*(eps+cfa[indx])+dirwts0[indx]*(eps+cfa[indx+v2]));
crl = cfa[indx-1]*(dirwts1[indx-2]+dirwts1[indx])/(dirwts1[indx-2]*(eps+cfa[indx])+dirwts1[indx]*(eps+cfa[indx-2]));
crr = cfa[indx+1]*(dirwts1[indx+2]+dirwts1[indx])/(dirwts1[indx+2]*(eps+cfa[indx])+dirwts1[indx]*(eps+cfa[indx+2]));
guha=HCLIP(cfa[indx-v1])+xdiv2f(cfa[indx]-cfa[indx-v2]);
gdha=HCLIP(cfa[indx+v1])+xdiv2f(cfa[indx]-cfa[indx+v2]);
glha=HCLIP(cfa[indx-1])+xdiv2f(cfa[indx]-cfa[indx-2]);
grha=HCLIP(cfa[indx+1])+xdiv2f(cfa[indx]-cfa[indx+2]);
if (fabsf(1.0f-cru)<arthresh) {guar=cfa[indx]*cru;} else {guar=guha;}
if (fabsf(1.0f-crd)<arthresh) {gdar=cfa[indx]*crd;} else {gdar=gdha;}
if (fabsf(1.0f-crl)<arthresh) {glar=cfa[indx]*crl;} else {glar=glha;}
if (fabsf(1.0f-crr)<arthresh) {grar=cfa[indx]*crr;} else {grar=grha;}
hwt = dirwts1[indx-1]/(dirwts1[indx-1]+dirwts1[indx+1]);
vwt = dirwts0[indx-v1]/(dirwts0[indx+v1]+dirwts0[indx-v1]);
//interpolated G via adaptive weights of cardinal evaluations
Gintvha = vwt*gdha+(1.0f-vwt)*guha;
Ginthha = hwt*grha+(1.0f-hwt)*glha;
//interpolated color differences
if (fcswitch) {
vcd[indx] = cfa[indx]-(vwt*gdar+(1.0f-vwt)*guar);
hcd[indx] = cfa[indx]-(hwt*grar+(1.0f-hwt)*glar);
vcdalt[indx] = cfa[indx]-Gintvha;
hcdalt[indx] = cfa[indx]-Ginthha;
} else {
//interpolated color differences
vcd[indx] = (vwt*gdar+(1.0f-vwt)*guar)-cfa[indx];
hcd[indx] = (hwt*grar+(1.0f-hwt)*glar)-cfa[indx];
vcdalt[indx] = Gintvha-cfa[indx];
hcdalt[indx] = Ginthha-cfa[indx];
}
fcswitch = !fcswitch;
if (cfa[indx] > clip_pt8 || Gintvha > clip_pt8 || Ginthha > clip_pt8) {
//use HA if highlights are (nearly) clipped
guar=guha; gdar=gdha; glar=glha; grar=grha;
vcd[indx]=vcdalt[indx]; hcd[indx]=hcdalt[indx];
}
//differences of interpolations in opposite directions
dgintv[indx]=min(SQR(guha-gdha),SQR(guar-gdar));
dginth[indx]=min(SQR(glha-grha),SQR(glar-grar));
}
}
#endif
#ifdef __SSE2__
__m128 hcdvarv, vcdvarv;
__m128 hcdaltvarv,vcdaltvarv,hcdv,vcdv,hcdaltv,vcdaltv,sgn3v,Ginthv,Gintvv,hcdoldv,vcdoldv;
__m128 threev = _mm_set1_ps( 3.0f );
__m128 clip_ptv = _mm_set1_ps( clip_pt );
__m128 nsgnv;
vmask hcdmask, vcdmask,tempmask;
if( !(FC(4,4)&1) )
sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f );
else
sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f );
sgn3v = threev * sgnv;
for (rr=4; rr<rr1-4; rr++) {
nsgnv = sgnv;
sgnv = -sgnv;
sgn3v = -sgn3v;
for (cc=4,indx=rr*TS+cc,c=FC(rr,cc)&1; cc<cc1-4; cc+=4,indx+=4) {
hcdv = LVF( hcd[indx] );
hcdvarv = threev*(SQRV(LVFU(hcd[indx-2]))+SQRV(hcdv)+SQRV(LVFU(hcd[indx+2])))-SQRV(LVFU(hcd[indx-2])+hcdv+LVFU(hcd[indx+2]));
hcdaltv = LVF( hcdalt[indx] );
hcdaltvarv = threev*(SQRV(LVFU(hcdalt[indx-2]))+SQRV(hcdaltv)+SQRV(LVFU(hcdalt[indx+2])))-SQRV(LVFU(hcdalt[indx-2])+hcdaltv+LVFU(hcdalt[indx+2]));
vcdv = LVF( vcd[indx] );
vcdvarv = threev*(SQRV(LVF(vcd[indx-v2]))+SQRV(vcdv)+SQRV(LVF(vcd[indx+v2])))-SQRV(LVF(vcd[indx-v2])+vcdv+LVF(vcd[indx+v2]));
vcdaltv = LVF( vcdalt[indx] );
vcdaltvarv = threev*(SQRV(LVF(vcdalt[indx-v2]))+SQRV(vcdaltv)+SQRV(LVF(vcdalt[indx+v2])))-SQRV(LVF(vcdalt[indx-v2])+vcdaltv+LVF(vcdalt[indx+v2]));
//choose the smallest variance; this yields a smoother interpolation
hcdv = vself( vmaskf_lt( hcdaltvarv, hcdvarv ), hcdaltv, hcdv);
vcdv = vself( vmaskf_lt( vcdaltvarv, vcdvarv ), vcdaltv, vcdv);
Ginthv = sgnv * hcdv + LVF( cfa[indx] );
temp2v = sgn3v * hcdv;
hwtv = onev + temp2v / ( epsv + Ginthv + LVF( cfa[indx]));
hcdmask = vmaskf_gt( nsgnv * hcdv, ZEROV );
hcdoldv = hcdv;
tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Ginthv, LVFU(cfa[indx-1]), LVFU(cfa[indx+1]) ));
hcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx])+Ginthv)), tempv, hwtv*hcdv + (onev - hwtv)*tempv);
hcdv = vself( hcdmask, hcdv, hcdoldv );
hcdv = vself( vmaskf_gt( Ginthv, clip_ptv), tempv, hcdv);
_mm_store_ps( &hcd[indx], hcdv);
Gintvv = sgnv * vcdv + LVF( cfa[indx] );
temp2v = sgn3v * vcdv;
vwtv = onev + temp2v / ( epsv + Gintvv + LVF( cfa[indx]));
vcdmask = vmaskf_gt( nsgnv * vcdv, ZEROV );
vcdoldv = vcdv;
tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Gintvv, LVF(cfa[indx-v1]), LVF(cfa[indx+v1]) ));
vcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx])+Gintvv)), tempv, vwtv*vcdv + (onev - vwtv)*tempv);
vcdv = vself( vcdmask, vcdv, vcdoldv );
vcdv = vself( vmaskf_gt( Gintvv, clip_ptv), tempv, vcdv);
_mm_store_ps( &vcd[indx], vcdv);
_mm_storeu_ps(&cddiffsq[indx], SQRV(vcdv-hcdv));
}
}
#else
for (rr=4; rr<rr1-4; rr++) {
//for (cc=4+(FC(rr,2)&1),indx=rr*TS+cc,c=FC(rr,cc); cc<cc1-4; cc+=2,indx+=2) {
for (cc=4,indx=rr*TS+cc,c=FC(rr,cc)&1; cc<cc1-4; cc++,indx++) {
hcdvar =3.0f*(SQR(hcd[indx-2])+SQR(hcd[indx])+SQR(hcd[indx+2]))-SQR(hcd[indx-2]+hcd[indx]+hcd[indx+2]);
hcdaltvar =3.0f*(SQR(hcdalt[indx-2])+SQR(hcdalt[indx])+SQR(hcdalt[indx+2]))-SQR(hcdalt[indx-2]+hcdalt[indx]+hcdalt[indx+2]);
vcdvar =3.0f*(SQR(vcd[indx-v2])+SQR(vcd[indx])+SQR(vcd[indx+v2]))-SQR(vcd[indx-v2]+vcd[indx]+vcd[indx+v2]);
vcdaltvar =3.0f*(SQR(vcdalt[indx-v2])+SQR(vcdalt[indx])+SQR(vcdalt[indx+v2]))-SQR(vcdalt[indx-v2]+vcdalt[indx]+vcdalt[indx+v2]);
//choose the smallest variance; this yields a smoother interpolation
if (hcdaltvar<hcdvar) hcd[indx]=hcdalt[indx];
if (vcdaltvar<vcdvar) vcd[indx]=vcdalt[indx];
//bound the interpolation in regions of high saturation
if (c) {//G site
Ginth = -hcd[indx]+cfa[indx];//R or B
Gintv = -vcd[indx]+cfa[indx];//B or R
if (hcd[indx]>0) {
if (3.0f*hcd[indx] > (Ginth+cfa[indx])) {
hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];
} else {
hwt = 1.0f -3.0f*hcd[indx]/(eps+Ginth+cfa[indx]);
hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx]);
}
}
if (vcd[indx]>0) {
if (3.0f*vcd[indx] > (Gintv+cfa[indx])) {
vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
} else {
vwt = 1.0f -3.0f*vcd[indx]/(eps+Gintv+cfa[indx]);
vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]);
}
}
if (Ginth > clip_pt) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for RT implementation
if (Gintv > clip_pt) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
//if (Ginth > pre_mul[c]) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for dcraw implementation
//if (Gintv > pre_mul[c]) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
} else {//R or B site
Ginth = hcd[indx]+cfa[indx];//interpolated G
Gintv = vcd[indx]+cfa[indx];
if (hcd[indx]<0) {
if (3.0f*hcd[indx] < -(Ginth+cfa[indx])) {
hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];
} else {
hwt = 1.0f +3.0f*hcd[indx]/(eps+Ginth+cfa[indx]);
hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx]);
}
}
if (vcd[indx]<0) {
if (3.0f*vcd[indx] < -(Gintv+cfa[indx])) {
vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
} else {
vwt = 1.0f +3.0f*vcd[indx]/(eps+Gintv+cfa[indx]);
vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]);
}
}
if (Ginth > clip_pt) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for RT implementation
if (Gintv > clip_pt) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
//if (Ginth > pre_mul[c]) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for dcraw implementation
//if (Gintv > pre_mul[c]) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
cddiffsq[indx] = SQR(vcd[indx]-hcd[indx]);
}
c = !c;
}
}
#endif
#ifdef __SSE2__
__m128 uavev,davev,lavev,ravev,Dgrbvvaruv,Dgrbvvardv,Dgrbhvarlv,Dgrbhvarrv,varwtv,diffwtv,vcdvar1v,hcdvar1v;
__m128 epssqv = _mm_set1_ps( epssq );
vmask decmask;
for (rr=6; rr<rr1-6; rr++) {
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=8,indx+=8) {
//compute color difference variances in cardinal directions
tempv = LC2VFU(vcd[indx]);
uavev = tempv+LC2VFU(vcd[indx-v1])+LC2VFU(vcd[indx-v2])+LC2VFU(vcd[indx-v3]);
davev = tempv+LC2VFU(vcd[indx+v1])+LC2VFU(vcd[indx+v2])+LC2VFU(vcd[indx+v3]);
Dgrbvvaruv = SQRV(tempv-uavev)+SQRV(LC2VFU(vcd[indx-v1])-uavev)+SQRV(LC2VFU(vcd[indx-v2])-uavev)+SQRV(LC2VFU(vcd[indx-v3])-uavev);
Dgrbvvardv = SQRV(tempv-davev)+SQRV(LC2VFU(vcd[indx+v1])-davev)+SQRV(LC2VFU(vcd[indx+v2])-davev)+SQRV(LC2VFU(vcd[indx+v3])-davev);
hwtv = LC2VFU(dirwts1[indx-1])/(LC2VFU(dirwts1[indx-1])+LC2VFU(dirwts1[indx+1]));
vwtv = LC2VFU(dirwts0[indx-v1])/(LC2VFU(dirwts0[indx+v1])+LC2VFU(dirwts0[indx-v1]));
tempv = LC2VFU(hcd[indx]);
lavev = tempv+LC2VFU(hcd[indx-1])+LC2VFU(hcd[indx-2])+LC2VFU(hcd[indx-3]);
ravev = tempv+LC2VFU(hcd[indx+1])+LC2VFU(hcd[indx+2])+LC2VFU(hcd[indx+3]);
Dgrbhvarlv = SQRV(tempv-lavev)+SQRV(LC2VFU(hcd[indx-1])-lavev)+SQRV(LC2VFU(hcd[indx-2])-lavev)+SQRV(LC2VFU(hcd[indx-3])-lavev);
Dgrbhvarrv = SQRV(tempv-ravev)+SQRV(LC2VFU(hcd[indx+1])-ravev)+SQRV(LC2VFU(hcd[indx+2])-ravev)+SQRV(LC2VFU(hcd[indx+3])-ravev);
vcdvarv = epssqv+vwtv*Dgrbvvardv+(onev-vwtv)*Dgrbvvaruv;
hcdvarv = epssqv+hwtv*Dgrbhvarrv+(onev-hwtv)*Dgrbhvarlv;
//compute fluctuations in up/down and left/right interpolations of colors
Dgrbvvaruv = (LC2VFU(dgintv[indx]))+(LC2VFU(dgintv[indx-v1]))+(LC2VFU(dgintv[indx-v2]));
Dgrbvvardv = (LC2VFU(dgintv[indx]))+(LC2VFU(dgintv[indx+v1]))+(LC2VFU(dgintv[indx+v2]));
Dgrbhvarlv = (LC2VFU(dginth[indx]))+(LC2VFU(dginth[indx-1]))+(LC2VFU(dginth[indx-2]));
Dgrbhvarrv = (LC2VFU(dginth[indx]))+(LC2VFU(dginth[indx+1]))+(LC2VFU(dginth[indx+2]));
vcdvar1v = epssqv+vwtv*Dgrbvvardv+(onev-vwtv)*Dgrbvvaruv;
hcdvar1v = epssqv+hwtv*Dgrbhvarrv+(onev-hwtv)*Dgrbhvarlv;
//determine adaptive weights for G interpolation
varwtv=hcdvarv/(vcdvarv+hcdvarv);
diffwtv=hcdvar1v/(vcdvar1v+hcdvar1v);
//if both agree on interpolation direction, choose the one with strongest directional discrimination;
//otherwise, choose the u/d and l/r difference fluctuation weights
decmask = vandm( vmaskf_gt( (zd5v - varwtv) * (zd5v - diffwtv), ZEROV ), vmaskf_lt( vabsf( zd5v - diffwtv), vabsf( zd5v - varwtv) ) );
_mm_storeu_ps( &hvwt[indx>>1], vself( decmask, varwtv, diffwtv));
}
}
#else
for (rr=6; rr<rr1-6; rr++) {
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=2,indx+=2) {
//compute color difference variances in cardinal directions
uave = vcd[indx]+vcd[indx-v1]+vcd[indx-v2]+vcd[indx-v3];
dave = vcd[indx]+vcd[indx+v1]+vcd[indx+v2]+vcd[indx+v3];
lave = hcd[indx]+hcd[indx-1]+hcd[indx-2]+hcd[indx-3];
rave = hcd[indx]+hcd[indx+1]+hcd[indx+2]+hcd[indx+3];
Dgrbvvaru = SQR(vcd[indx]-uave)+SQR(vcd[indx-v1]-uave)+SQR(vcd[indx-v2]-uave)+SQR(vcd[indx-v3]-uave);
Dgrbvvard = SQR(vcd[indx]-dave)+SQR(vcd[indx+v1]-dave)+SQR(vcd[indx+v2]-dave)+SQR(vcd[indx+v3]-dave);
Dgrbhvarl = SQR(hcd[indx]-lave)+SQR(hcd[indx-1]-lave)+SQR(hcd[indx-2]-lave)+SQR(hcd[indx-3]-lave);
Dgrbhvarr = SQR(hcd[indx]-rave)+SQR(hcd[indx+1]-rave)+SQR(hcd[indx+2]-rave)+SQR(hcd[indx+3]-rave);
hwt = dirwts1[indx-1]/(dirwts1[indx-1]+dirwts1[indx+1]);
vwt = dirwts0[indx-v1]/(dirwts0[indx+v1]+dirwts0[indx-v1]);
vcdvar = epssq+vwt*Dgrbvvard+(1.0f-vwt)*Dgrbvvaru;
hcdvar = epssq+hwt*Dgrbhvarr+(1.0f-hwt)*Dgrbhvarl;
//compute fluctuations in up/down and left/right interpolations of colors
Dgrbvvaru = (dgintv[indx])+(dgintv[indx-v1])+(dgintv[indx-v2]);
Dgrbvvard = (dgintv[indx])+(dgintv[indx+v1])+(dgintv[indx+v2]);
Dgrbhvarl = (dginth[indx])+(dginth[indx-1])+(dginth[indx-2]);
Dgrbhvarr = (dginth[indx])+(dginth[indx+1])+(dginth[indx+2]);
vcdvar1 = epssq+vwt*Dgrbvvard+(1.0f-vwt)*Dgrbvvaru;
hcdvar1 = epssq+hwt*Dgrbhvarr+(1.0f-hwt)*Dgrbhvarl;
//determine adaptive weights for G interpolation
varwt=hcdvar/(vcdvar+hcdvar);
diffwt=hcdvar1/(vcdvar1+hcdvar1);
//if both agree on interpolation direction, choose the one with strongest directional discrimination;
//otherwise, choose the u/d and l/r difference fluctuation weights
if ((0.5-varwt)*(0.5-diffwt)>0 && fabsf(0.5-diffwt)<fabsf(0.5-varwt)) {hvwt[indx>>1]=varwt;} else {hvwt[indx>>1]=diffwt;}
//hvwt[indx]=varwt;
}
}
#endif
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Nyquist test
for (rr=6; rr<rr1-6; rr++)
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=2,indx+=2) {
//nyquist texture test: ask if difference of vcd compared to hcd is larger or smaller than RGGB gradients
nyqtest = (gaussodd[0]*cddiffsq[indx]+
gaussodd[1]*(cddiffsq[(indx-m1)]+cddiffsq[(indx+p1)]+
cddiffsq[(indx-p1)]+cddiffsq[(indx+m1)])+
gaussodd[2]*(cddiffsq[(indx-v2)]+cddiffsq[(indx-2)]+
cddiffsq[(indx+2)]+cddiffsq[(indx+v2)])+
gaussodd[3]*(cddiffsq[(indx-m2)]+cddiffsq[(indx+p2)]+
cddiffsq[(indx-p2)]+cddiffsq[(indx+m2)]));
nyqtest -= nyqthresh*(gaussgrad[0]*(delhvsqsum[indx])+
gaussgrad[1]*(delhvsqsum[indx-v1]+delhvsqsum[indx+1]+
delhvsqsum[indx-1]+delhvsqsum[indx+v1])+
gaussgrad[2]*(delhvsqsum[indx-m1]+delhvsqsum[indx+p1]+
delhvsqsum[indx-p1]+delhvsqsum[indx+m1])+
gaussgrad[3]*(delhvsqsum[indx-v2]+delhvsqsum[indx-2]+
delhvsqsum[indx+2]+delhvsqsum[indx+v2])+
gaussgrad[4]*(delhvsqsum[indx-2*TS-1]+delhvsqsum[indx-2*TS+1]+
delhvsqsum[indx-TS-2]+delhvsqsum[indx-TS+2]+
delhvsqsum[indx+TS-2]+delhvsqsum[indx+TS+2]+
delhvsqsum[indx+2*TS-1]+delhvsqsum[indx+2*TS+1])+
gaussgrad[5]*(delhvsqsum[indx-m2]+delhvsqsum[indx+p2]+
delhvsqsum[indx-p2]+delhvsqsum[indx+m2]));
if (nyqtest>0)
nyquist[indx>>1]=1;//nyquist=1 for nyquist region
}
unsigned int nyquisttemp;
for (rr=8; rr<rr1-8; rr++){
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
nyquisttemp=(nyquist[(indx-v2)>>1]+nyquist[(indx-m1)>>1]+nyquist[(indx+p1)>>1]+
nyquist[(indx-2)>>1]+nyquist[indx>>1]+nyquist[(indx+2)>>1]+
nyquist[(indx-p1)>>1]+nyquist[(indx+m1)>>1]+nyquist[(indx+v2)>>1]);
//if most of your neighbors are named Nyquist, it's likely that you're one too
if (nyquisttemp>4) nyquist[indx>>1]=1;
//or not
if (nyquisttemp<4) nyquist[indx>>1]=0;
}
}
// end of Nyquist test
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// in areas of Nyquist texture, do area interpolation
for (rr=8; rr<rr1-8; rr++)
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
if (nyquist[indx>>1]) {
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// area interpolation
sumh=sumv=sumsqh=sumsqv=areawt=0;
for (i=-6; i<7; i+=2)
for (j=-6; j<7; j+=2) {
indx1=(rr+i)*TS+cc+j;
if (nyquist[indx1>>1]) {
sumh += cfa[indx1]-xdiv2f(cfa[indx1-1]+cfa[indx1+1]);
sumv += cfa[indx1]-xdiv2f(cfa[indx1-v1]+cfa[indx1+v1]);
sumsqh += xdiv2f(SQR(cfa[indx1]-cfa[indx1-1])+SQR(cfa[indx1]-cfa[indx1+1]));
sumsqv += xdiv2f(SQR(cfa[indx1]-cfa[indx1-v1])+SQR(cfa[indx1]-cfa[indx1+v1]));
areawt +=1;
}
}
//horizontal and vertical color differences, and adaptive weight
hcdvar=epssq+fabsf(areawt*sumsqh-sumh*sumh);
vcdvar=epssq+fabsf(areawt*sumsqv-sumv*sumv);
hvwt[indx>>1]=hcdvar/(vcdvar+hcdvar);
// end of area interpolation
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
}
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//populate G at R/B sites
for (rr=8; rr<rr1-8; rr++)
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
//first ask if one gets more directional discrimination from nearby B/R sites
hvwtalt = xdivf(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1],2);
// hvwtalt = 0.25*(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1]);
// vo=fabsf(0.5-hvwt[indx>>1]);
// ve=fabsf(0.5-hvwtalt);
if (fabsf(0.5-hvwt[indx>>1])<fabsf(0.5-hvwtalt)) {hvwt[indx>>1]=hvwtalt;}//a better result was obtained from the neighbors
// if (vo<ve) {hvwt[indx>>1]=hvwtalt;}//a better result was obtained from the neighbors
Dgrb[0][indx>>1] = (hcd[indx]*(1.0f-hvwt[indx>>1]) + vcd[indx]*hvwt[indx>>1]);//evaluate color differences
//if (hvwt[indx]<0.5) Dgrb[indx][0]=hcd[indx];
//if (hvwt[indx]>0.5) Dgrb[indx][0]=vcd[indx];
rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1];//evaluate G (finally!)
//local curvature in G (preparation for nyquist refinement step)
if (nyquist[indx>>1]) {
Dgrb2[indx>>1].h = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-1]+rgbgreen[indx+1]));
Dgrb2[indx>>1].v = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-v1]+rgbgreen[indx+v1]));
} else {
Dgrb2[indx>>1].h = Dgrb2[indx>>1].v = 0;
}
}
//end of standard interpolation
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// refine Nyquist areas using G curvatures
for (rr=8; rr<rr1-8; rr++)
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
if (nyquist[indx>>1]) {
//local averages (over Nyquist pixels only) of G curvature squared
gvarh = epssq + (gquinc[0]*Dgrb2[indx>>1].h+
gquinc[1]*(Dgrb2[(indx-m1)>>1].h+Dgrb2[(indx+p1)>>1].h+Dgrb2[(indx-p1)>>1].h+Dgrb2[(indx+m1)>>1].h)+
gquinc[2]*(Dgrb2[(indx-v2)>>1].h+Dgrb2[(indx-2)>>1].h+Dgrb2[(indx+2)>>1].h+Dgrb2[(indx+v2)>>1].h)+
gquinc[3]*(Dgrb2[(indx-m2)>>1].h+Dgrb2[(indx+p2)>>1].h+Dgrb2[(indx-p2)>>1].h+Dgrb2[(indx+m2)>>1].h));
gvarv = epssq + (gquinc[0]*Dgrb2[indx>>1].v+
gquinc[1]*(Dgrb2[(indx-m1)>>1].v+Dgrb2[(indx+p1)>>1].v+Dgrb2[(indx-p1)>>1].v+Dgrb2[(indx+m1)>>1].v)+
gquinc[2]*(Dgrb2[(indx-v2)>>1].v+Dgrb2[(indx-2)>>1].v+Dgrb2[(indx+2)>>1].v+Dgrb2[(indx+v2)>>1].v)+
gquinc[3]*(Dgrb2[(indx-m2)>>1].v+Dgrb2[(indx+p2)>>1].v+Dgrb2[(indx-p2)>>1].v+Dgrb2[(indx+m2)>>1].v));
//use the results as weights for refined G interpolation
Dgrb[0][indx>>1] = (hcd[indx]*gvarv + vcd[indx]*gvarh)/(gvarv+gvarh);
rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1];
}
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// diagonal interpolation correction
#ifdef __SSE2__
__m128 rbsev,rbnwv,rbnev,rbswv,cfav,rbmv,rbpv,temp1v,wtv;
__m128 wtsev, wtnwv, wtnev, wtswv, rbvarmv;
__m128 gausseven0v = _mm_set1_ps(gausseven[0]);
__m128 gausseven1v = _mm_set1_ps(gausseven[1]);
__m128 twov = _mm_set1_ps(2.0f);
#endif
for (rr=8; rr<rr1-8; rr++) {
#ifdef __SSE2__
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-8; cc+=8,indx+=8,indx1+=4) {
//diagonal color ratios
cfav = LC2VFU(cfa[indx]);
temp1v = LC2VFU(cfa[indx+m1]);
temp2v = LC2VFU(cfa[indx+m2]);
rbsev = (temp1v + temp1v) / (epsv + cfav + temp2v );
rbsev = vself(vmaskf_lt(vabsf(onev - rbsev), arthreshv), cfav * rbsev, temp1v + zd5v * (cfav - temp2v));
temp1v = LC2VFU(cfa[indx-m1]);
temp2v = LC2VFU(cfa[indx-m2]);
rbnwv = (temp1v + temp1v) / (epsv + cfav + temp2v );
rbnwv = vself(vmaskf_lt(vabsf(onev - rbnwv), arthreshv), cfav * rbnwv, temp1v + zd5v * (cfav - temp2v));
temp1v = epsv + LVFU(delm[indx1]);
wtsev= temp1v+LVFU(delm[(indx+m1)>>1])+LVFU(delm[(indx+m2)>>1]);//same as for wtu,wtd,wtl,wtr
wtnwv= temp1v+LVFU(delm[(indx-m1)>>1])+LVFU(delm[(indx-m2)>>1]);
rbmv = (wtsev*rbnwv+wtnwv*rbsev)/(wtsev+wtnwv);
temp1v = ULIMV(rbmv ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1]));
wtv = twov * (cfav-rbmv)/(epsv+rbmv+cfav);
temp2v = wtv * rbmv + (onev-wtv)*temp1v;
temp2v = vself(vmaskf_lt(rbmv + rbmv, cfav), temp1v, temp2v);
temp2v = vself(vmaskf_lt(rbmv, cfav), temp2v, rbmv);
_mm_storeu_ps(&rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1])), temp2v ));
temp1v = LC2VFU(cfa[indx+p1]);
temp2v = LC2VFU(cfa[indx+p2]);
rbnev = (temp1v + temp1v) / (epsv + cfav + temp2v );
rbnev = vself(vmaskf_lt(vabsf(onev - rbnev), arthreshv), cfav * rbnev, temp1v + zd5v * (cfav - temp2v));
temp1v = LC2VFU(cfa[indx-p1]);
temp2v = LC2VFU(cfa[indx-p2]);
rbswv = (temp1v + temp1v) / (epsv + cfav + temp2v );
rbswv = vself(vmaskf_lt(vabsf(onev - rbswv), arthreshv), cfav * rbswv, temp1v + zd5v * (cfav - temp2v));
temp1v = epsv + LVFU(delp[indx1]);
wtnev= temp1v+LVFU(delp[(indx+p1)>>1])+LVFU(delp[(indx+p2)>>1]);
wtswv= temp1v+LVFU(delp[(indx-p1)>>1])+LVFU(delp[(indx-p2)>>1]);
rbpv = (wtnev*rbswv+wtswv*rbnev)/(wtnev+wtswv);
temp1v = ULIMV(rbpv ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1]));
wtv = twov * (cfav-rbpv)/(epsv+rbpv+cfav);
temp2v = wtv * rbpv + (onev-wtv)*temp1v;
temp2v = vself(vmaskf_lt(rbpv + rbpv, cfav), temp1v, temp2v);
temp2v = vself(vmaskf_lt(rbpv, cfav), temp2v, rbpv);
_mm_storeu_ps(&rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1])), temp2v ));
rbvarmv = epssqv + (gausseven0v*(LVFU(Dgrbsq1m[(indx-v1)>>1])+LVFU(Dgrbsq1m[(indx-1)>>1])+LVFU(Dgrbsq1m[(indx+1)>>1])+LVFU(Dgrbsq1m[(indx+v1)>>1])) +
gausseven1v*(LVFU(Dgrbsq1m[(indx-v2-1)>>1])+LVFU(Dgrbsq1m[(indx-v2+1)>>1])+LVFU(Dgrbsq1m[(indx-2-v1)>>1])+LVFU(Dgrbsq1m[(indx+2-v1)>>1])+
LVFU(Dgrbsq1m[(indx-2+v1)>>1])+LVFU(Dgrbsq1m[(indx+2+v1)>>1])+LVFU(Dgrbsq1m[(indx+v2-1)>>1])+LVFU(Dgrbsq1m[(indx+v2+1)>>1])));
_mm_storeu_ps(&pmwt[indx1] , rbvarmv/((epssqv + (gausseven0v*(LVFU(Dgrbsq1p[(indx-v1)>>1])+LVFU(Dgrbsq1p[(indx-1)>>1])+LVFU(Dgrbsq1p[(indx+1)>>1])+LVFU(Dgrbsq1p[(indx+v1)>>1])) +
gausseven1v*(LVFU(Dgrbsq1p[(indx-v2-1)>>1])+LVFU(Dgrbsq1p[(indx-v2+1)>>1])+LVFU(Dgrbsq1p[(indx-2-v1)>>1])+LVFU(Dgrbsq1p[(indx+2-v1)>>1])+
LVFU(Dgrbsq1p[(indx-2+v1)>>1])+LVFU(Dgrbsq1p[(indx+2+v1)>>1])+LVFU(Dgrbsq1p[(indx+v2-1)>>1])+LVFU(Dgrbsq1p[(indx+v2+1)>>1]))))+rbvarmv));
}
#else
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-8; cc+=2,indx+=2,indx1++) {
//diagonal color ratios
crse=xmul2f(cfa[indx+m1])/(eps+cfa[indx]+(cfa[indx+m2]));
crnw=xmul2f(cfa[indx-m1])/(eps+cfa[indx]+(cfa[indx-m2]));
crne=xmul2f(cfa[indx+p1])/(eps+cfa[indx]+(cfa[indx+p2]));
crsw=xmul2f(cfa[indx-p1])/(eps+cfa[indx]+(cfa[indx-p2]));
//assign B/R at R/B sites
if (fabsf(1.0f-crse)<arthresh)
rbse=cfa[indx]*crse;//use this if more precise diag interp is necessary
else
rbse=(cfa[indx+m1])+xdiv2f(cfa[indx]-cfa[indx+m2]);
if (fabsf(1.0f-crnw)<arthresh)
rbnw=cfa[indx]*crnw;
else
rbnw=(cfa[indx-m1])+xdiv2f(cfa[indx]-cfa[indx-m2]);
if (fabsf(1.0f-crne)<arthresh)
rbne=cfa[indx]*crne;
else
rbne=(cfa[indx+p1])+xdiv2f(cfa[indx]-cfa[indx+p2]);
if (fabsf(1.0f-crsw)<arthresh)
rbsw=cfa[indx]*crsw;
else
rbsw=(cfa[indx-p1])+xdiv2f(cfa[indx]-cfa[indx-p2]);
wtse= eps+delm[indx1]+delm[(indx+m1)>>1]+delm[(indx+m2)>>1];//same as for wtu,wtd,wtl,wtr
wtnw= eps+delm[indx1]+delm[(indx-m1)>>1]+delm[(indx-m2)>>1];
wtne= eps+delp[indx1]+delp[(indx+p1)>>1]+delp[(indx+p2)>>1];
wtsw= eps+delp[indx1]+delp[(indx-p1)>>1]+delp[(indx-p2)>>1];
rbm[indx1] = (wtse*rbnw+wtnw*rbse)/(wtse+wtnw);
rbp[indx1] = (wtne*rbsw+wtsw*rbne)/(wtne+wtsw);
/*
rbvarp = epssq + (gausseven[0]*(Dgrbsq1[indx-v1].p+Dgrbsq1[indx-1].p+Dgrbsq1[indx+1].p+Dgrbsq1[indx+v1].p) +
gausseven[1]*(Dgrbsq1[indx-v2-1].p+Dgrbsq1[indx-v2+1].p+Dgrbsq1[indx-2-v1].p+Dgrbsq1[indx+2-v1].p+
Dgrbsq1[indx-2+v1].p+Dgrbsq1[indx+2+v1].p+Dgrbsq1[indx+v2-1].p+Dgrbsq1[indx+v2+1].p));
*/
rbvarm = epssq + (gausseven[0]*(Dgrbsq1m[(indx-v1)>>1]+Dgrbsq1m[(indx-1)>>1]+Dgrbsq1m[(indx+1)>>1]+Dgrbsq1m[(indx+v1)>>1]) +
gausseven[1]*(Dgrbsq1m[(indx-v2-1)>>1]+Dgrbsq1m[(indx-v2+1)>>1]+Dgrbsq1m[(indx-2-v1)>>1]+Dgrbsq1m[(indx+2-v1)>>1]+
Dgrbsq1m[(indx-2+v1)>>1]+Dgrbsq1m[(indx+2+v1)>>1]+Dgrbsq1m[(indx+v2-1)>>1]+Dgrbsq1m[(indx+v2+1)>>1]));
pmwt[indx1] = rbvarm/((epssq + (gausseven[0]*(Dgrbsq1p[(indx-v1)>>1]+Dgrbsq1p[(indx-1)>>1]+Dgrbsq1p[(indx+1)>>1]+Dgrbsq1p[(indx+v1)>>1]) +
gausseven[1]*(Dgrbsq1p[(indx-v2-1)>>1]+Dgrbsq1p[(indx-v2+1)>>1]+Dgrbsq1p[(indx-2-v1)>>1]+Dgrbsq1p[(indx+2-v1)>>1]+
Dgrbsq1p[(indx-2+v1)>>1]+Dgrbsq1p[(indx+2+v1)>>1]+Dgrbsq1p[(indx+v2-1)>>1]+Dgrbsq1p[(indx+v2+1)>>1])))+rbvarm);
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//bound the interpolation in regions of high saturation
if (rbp[indx1]<cfa[indx]) {
if (xmul2f(rbp[indx1]) < cfa[indx]) {
rbp[indx1] = ULIM(rbp[indx1] ,cfa[indx-p1],cfa[indx+p1]);
} else {
pwt = xmul2f(cfa[indx]-rbp[indx1])/(eps+rbp[indx1]+cfa[indx]);
rbp[indx1]=pwt*rbp[indx1] + (1.0f-pwt)*ULIM(rbp[indx1],cfa[indx-p1],cfa[indx+p1]);
}
}
if (rbm[indx1]<cfa[indx]) {
if (xmul2f(rbm[indx1]) < cfa[indx]) {
rbm[indx1] = ULIM(rbm[indx1] ,cfa[indx-m1],cfa[indx+m1]);
} else {
mwt = xmul2f(cfa[indx]-rbm[indx1])/(eps+rbm[indx1]+cfa[indx]);
rbm[indx1]=mwt*rbm[indx1] + (1.0f-mwt)*ULIM(rbm[indx1],cfa[indx-m1],cfa[indx+m1]);
}
}
if (rbp[indx1] > clip_pt) rbp[indx1]=ULIM(rbp[indx1],cfa[indx-p1],cfa[indx+p1]);//for RT implementation
if (rbm[indx1] > clip_pt) rbm[indx1]=ULIM(rbm[indx1],cfa[indx-m1],cfa[indx+m1]);
//c=2-FC(rr,cc);//for dcraw implementation
//if (rbp[indx] > pre_mul[c]) rbp[indx]=ULIM(rbp[indx],cfa[indx-p1],cfa[indx+p1]);
//if (rbm[indx] > pre_mul[c]) rbm[indx]=ULIM(rbm[indx],cfa[indx-m1],cfa[indx+m1]);
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//rbint[indx] = 0.5*(cfa[indx] + (rbp*rbvarm+rbm*rbvarp)/(rbvarp+rbvarm));//this is R+B, interpolated
}
#endif
}
#ifdef __SSE2__
__m128 pmwtaltv;
__m128 zd25v = _mm_set1_ps(0.25f);
#endif
for (rr=10; rr<rr1-10; rr++)
#ifdef __SSE2__
for (cc=10+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-10; cc+=8,indx+=8,indx1+=4) {
//first ask if one gets more directional discrimination from nearby B/R sites
pmwtaltv = zd25v*(LVFU(pmwt[(indx-m1)>>1])+LVFU(pmwt[(indx+p1)>>1])+LVFU(pmwt[(indx-p1)>>1])+LVFU(pmwt[(indx+m1)>>1]));
tempv = LVFU(pmwt[indx1]);
tempv = vself(vmaskf_lt(vabsf(zd5v-tempv), vabsf(zd5v-pmwtaltv)), pmwtaltv, tempv);
_mm_storeu_ps( &pmwt[indx1], tempv);
_mm_storeu_ps( &rbint[indx1], zd5v * (LC2VFU(cfa[indx]) + LVFU(rbm[indx1]) * (onev - tempv) + LVFU(rbp[indx1]) * tempv));
}
#else
for (cc=10+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-10; cc+=2,indx+=2,indx1++) {
//first ask if one gets more directional discrimination from nearby B/R sites
pmwtalt = xdivf(pmwt[(indx-m1)>>1]+pmwt[(indx+p1)>>1]+pmwt[(indx-p1)>>1]+pmwt[(indx+m1)>>1],2);
if (fabsf(0.5-pmwt[indx1])<fabsf(0.5-pmwtalt)) {pmwt[indx1]=pmwtalt;}//a better result was obtained from the neighbors
rbint[indx1] = xdiv2f(cfa[indx] + rbm[indx1]*(1.0f-pmwt[indx1]) + rbp[indx1]*pmwt[indx1]);//this is R+B, interpolated
}
#endif
for (rr=12; rr<rr1-12; rr++)
for (cc=12+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-12; cc+=2,indx+=2,indx1++) {
if (fabsf(0.5-pmwt[indx>>1])<fabsf(0.5-hvwt[indx>>1]) )
continue;
//now interpolate G vertically/horizontally using R+B values
//unfortunately, since G interpolation cannot be done diagonally this may lead to color shifts
//color ratios for G interpolation
cru = cfa[indx-v1]*2.0/(eps+rbint[indx1]+rbint[(indx1-v1)]);
crd = cfa[indx+v1]*2.0/(eps+rbint[indx1]+rbint[(indx1+v1)]);
crl = cfa[indx-1]*2.0/(eps+rbint[indx1]+rbint[(indx1-1)]);
crr = cfa[indx+1]*2.0/(eps+rbint[indx1]+rbint[(indx1+1)]);
//interpolated G via adaptive ratios or Hamilton-Adams in each cardinal direction
if (fabsf(1.0f-cru)<arthresh) {gu=rbint[indx1]*cru;}
else {gu=cfa[indx-v1]+xdiv2f(rbint[indx1]-rbint[(indx1-v1)]);}
if (fabsf(1.0f-crd)<arthresh) {gd=rbint[indx1]*crd;}
else {gd=cfa[indx+v1]+xdiv2f(rbint[indx1]-rbint[(indx1+v1)]);}
if (fabsf(1.0f-crl)<arthresh) {gl=rbint[indx1]*crl;}
else {gl=cfa[indx-1]+xdiv2f(rbint[indx1]-rbint[(indx1-1)]);}
if (fabsf(1.0f-crr)<arthresh) {gr=rbint[indx1]*crr;}
else {gr=cfa[indx+1]+xdiv2f(rbint[indx1]-rbint[(indx1+1)]);}
//gu=rbint[indx]*cru;
//gd=rbint[indx]*crd;
//gl=rbint[indx]*crl;
//gr=rbint[indx]*crr;
//interpolated G via adaptive weights of cardinal evaluations
Gintv = (dirwts0[indx-v1]*gd+dirwts0[indx+v1]*gu)/(dirwts0[indx+v1]+dirwts0[indx-v1]);
Ginth = (dirwts1[indx-1]*gr+dirwts1[indx+1]*gl)/(dirwts1[indx-1]+dirwts1[indx+1]);
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//bound the interpolation in regions of high saturation
if (Gintv<rbint[indx1]) {
if (2*Gintv < rbint[indx1]) {
Gintv = ULIM(Gintv ,cfa[indx-v1],cfa[indx+v1]);
} else {
vwt = 2.0*(rbint[indx1]-Gintv)/(eps+Gintv+rbint[indx1]);
Gintv=vwt*Gintv + (1.0f-vwt)*ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
}
}
if (Ginth<rbint[indx1]) {
if (2*Ginth < rbint[indx1]) {
Ginth = ULIM(Ginth ,cfa[indx-1],cfa[indx+1]);
} else {
hwt = 2.0*(rbint[indx1]-Ginth)/(eps+Ginth+rbint[indx1]);
Ginth=hwt*Ginth + (1.0f-hwt)*ULIM(Ginth,cfa[indx-1],cfa[indx+1]);
}
}
if (Ginth > clip_pt) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]);//for RT implementation
if (Gintv > clip_pt) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
//c=FC(rr,cc);//for dcraw implementation
//if (Ginth > pre_mul[c]) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]);
//if (Gintv > pre_mul[c]) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rgbgreen[indx] = Ginth*(1.0f-hvwt[indx1]) + Gintv*hvwt[indx1];
//rgb[indx][1] = 0.5*(rgb[indx][1]+0.25*(rgb[indx-v1][1]+rgb[indx+v1][1]+rgb[indx-1][1]+rgb[indx+1][1]));
Dgrb[0][indx>>1] = rgbgreen[indx]-cfa[indx];
//rgb[indx][2-FC(rr,cc)]=2*rbint[indx]-cfa[indx];
}
//end of diagonal interpolation correction
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//fancy chrominance interpolation
//(ey,ex) is location of R site
for (rr=13-ey; rr<rr1-12; rr+=2)
for (cc=13-ex,indx1=(rr*TS+cc)>>1; cc<cc1-12; cc+=2,indx1++) {//B coset
Dgrb[1][indx1]=Dgrb[0][indx1];//split out G-B from G-R
Dgrb[0][indx1]=0;
}
#ifdef __SSE2__
// __m128 wtnwv,wtnev,wtswv,wtsev;
__m128 oned325v = _mm_set1_ps( 1.325f );
__m128 zd175v = _mm_set1_ps( 0.175f );
__m128 zd075v = _mm_set1_ps( 0.075f );
#endif
for (rr=14; rr<rr1-14; rr++)
#ifdef __SSE2__
for (cc=14+(FC(rr,2)&1),indx=rr*TS+cc,c=1-FC(rr,cc)/2; cc<cc1-14; cc+=8,indx+=8) {
wtnwv=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx+m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1])));
wtnev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1])));
wtswv=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1])));
wtsev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1])));
//Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse);
_mm_storeu_ps(&Dgrb[c][indx>>1], (wtnwv*(oned325v*LVFU(Dgrb[c][(indx-m1)>>1])-zd175v*LVFU(Dgrb[c][(indx-m3)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-v2)>>1]) )+
wtnev*(oned325v*LVFU(Dgrb[c][(indx+p1)>>1])-zd175v*LVFU(Dgrb[c][(indx+p3)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+v2)>>1]) )+
wtswv*(oned325v*LVFU(Dgrb[c][(indx-p1)>>1])-zd175v*LVFU(Dgrb[c][(indx-p3)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-v2)>>1]) )+
wtsev*(oned325v*LVFU(Dgrb[c][(indx+m1)>>1])-zd175v*LVFU(Dgrb[c][(indx+m3)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+v2)>>1]) ))/(wtnwv+wtnev+wtswv+wtsev));
}
#else
for (cc=14+(FC(rr,2)&1),indx=rr*TS+cc,c=1-FC(rr,cc)/2; cc<cc1-14; cc+=2,indx+=2) {
wtnw=1.0f/(eps+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx+m1)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx-m3)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m3)>>1]));
wtne=1.0f/(eps+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p1)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx+p3)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p3)>>1]));
wtsw=1.0f/(eps+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p1)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+m3)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p3)>>1]));
wtse=1.0f/(eps+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m1)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-p3)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx+m3)>>1]));
//Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse);
Dgrb[c][indx>>1]=(wtnw*(1.325f*Dgrb[c][(indx-m1)>>1]-0.175f*Dgrb[c][(indx-m3)>>1]-0.075f*Dgrb[c][(indx-m1-2)>>1]-0.075f*Dgrb[c][(indx-m1-v2)>>1] )+
wtne*(1.325f*Dgrb[c][(indx+p1)>>1]-0.175f*Dgrb[c][(indx+p3)>>1]-0.075f*Dgrb[c][(indx+p1+2)>>1]-0.075f*Dgrb[c][(indx+p1+v2)>>1] )+
wtsw*(1.325f*Dgrb[c][(indx-p1)>>1]-0.175f*Dgrb[c][(indx-p3)>>1]-0.075f*Dgrb[c][(indx-p1-2)>>1]-0.075f*Dgrb[c][(indx-p1-v2)>>1] )+
wtse*(1.325f*Dgrb[c][(indx+m1)>>1]-0.175f*Dgrb[c][(indx+m3)>>1]-0.075f*Dgrb[c][(indx+m1+2)>>1]-0.075f*Dgrb[c][(indx+m1+v2)>>1] ))/(wtnw+wtne+wtsw+wtse);
}
#endif
float temp;
for (rr=16; rr<rr1-16; rr++) {
if((FC(rr,2)&1)==1) {
for (cc=16,indx=rr*TS+cc,row=rr+top; cc<cc1-16-(cc1&1); cc+=2,indx++) {
col = cc + left;
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
temp);
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
temp);
indx++;
col++;
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
}
if(cc1&1) { // width of tile is odd
col = cc + left;
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
temp);
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
temp);
}
}
else {
for (cc=16,indx=rr*TS+cc,row=rr+top; cc<cc1-16-(cc1&1); cc+=2,indx++) {
col = cc + left;
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
indx++;
col++;
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
temp);
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
temp);
}
if(cc1&1) { // width of tile is odd
col = cc + left;
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
}
}
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// copy smoothed results back to image matrix
for (rr=16; rr < rr1-16; rr++){
#ifdef __SSE2__
for (row=rr+top, cc=16; cc < cc1-19; cc+=4) {
_mm_storeu_ps(&green[row][cc + left], LVF(rgbgreen[rr*TS+cc]) * c65535v);
}
#else
for (row=rr+top, cc=16; cc < cc1-16; cc++) {
col = cc + left;
indx=rr*TS+cc;
green[row][col] = ((65535.0f*rgbgreen[indx]));
//for dcraw implementation
//for (c=0; c<3; c++){
// image[indx][c] = CLIP((int)(65535.0f*rgb[rr*TS+cc][c] + 0.5f));
//}
}
#endif
}
//end of main loop
if(plistener) {
progresscounter++;
if(progresscounter % 4 == 0) {
#pragma omp critical
{
progress+=(double)4*((TS-32)*(TS-32))/(height*width);
if (progress>1.0)
{
progress=1.0;
}
plistener->setProgress(progress);
}
}
}
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// clean up
free(buffer);
}
if(plistener)
plistener->setProgress(1.0);
// done
#undef TS
}
}