1420 lines
61 KiB
C++
1420 lines
61 KiB
C++
////////////////////////////////////////////////////////////////
|
|
//
|
|
// AMaZE demosaic algorithm
|
|
// (Aliasing Minimization and Zipper Elimination)
|
|
//
|
|
// copyright (c) 2008-2010 Emil Martinec <ejmartin@uchicago.edu>
|
|
//
|
|
// incorporating ideas of Luis Sanz Rodrigues and Paul Lee
|
|
//
|
|
// code dated: May 27, 2010
|
|
//
|
|
// amaze_interpolate_RT.cc is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
//
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
#include "rtengine.h"
|
|
#include "rawimagesource.h"
|
|
#include "rt_math.h"
|
|
#include "../rtgui/multilangmgr.h"
|
|
#include "procparams.h"
|
|
#include "sleef.c"
|
|
#include "opthelper.h"
|
|
|
|
namespace rtengine {
|
|
|
|
SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh) {
|
|
|
|
#define HCLIP(x) x //is this still necessary???
|
|
//min(clip_pt,x)
|
|
|
|
int width=winw, height=winh;
|
|
|
|
|
|
const float clip_pt = 1/initialGain;
|
|
const float clip_pt8 = 0.8f/initialGain;
|
|
|
|
|
|
#define TS 160 // Tile size; the image is processed in square tiles to lower memory requirements and facilitate multi-threading
|
|
#define TSH 80 // half of Tile size
|
|
|
|
// local variables
|
|
|
|
|
|
//offset of R pixel within a Bayer quartet
|
|
int ex, ey;
|
|
|
|
//shifts of pointer value to access pixels in vertical and diagonal directions
|
|
static const int v1=TS, v2=2*TS, v3=3*TS, p1=-TS+1, p2=-2*TS+2, p3=-3*TS+3, m1=TS+1, m2=2*TS+2, m3=3*TS+3;
|
|
|
|
//tolerance to avoid dividing by zero
|
|
static const float eps=1e-5, epssq=1e-10; //tolerance to avoid dividing by zero
|
|
|
|
//adaptive ratios threshold
|
|
static const float arthresh=0.75;
|
|
//nyquist texture test threshold
|
|
static const float nyqthresh=0.5;
|
|
|
|
//gaussian on 5x5 quincunx, sigma=1.2
|
|
static const float gaussodd[4] = {0.14659727707323927f, 0.103592713382435f, 0.0732036125103057f, 0.0365543548389495f};
|
|
//gaussian on 5x5, sigma=1.2
|
|
static const float gaussgrad[6] = {0.07384411893421103f, 0.06207511968171489f, 0.0521818194747806f,
|
|
0.03687419286733595f, 0.03099732204057846f, 0.018413194161458882f};
|
|
//gaussian on 5x5 alt quincunx, sigma=1.5
|
|
static const float gausseven[2] = {0.13719494435797422f, 0.05640252782101291f};
|
|
//guassian on quincunx grid
|
|
static const float gquinc[4] = {0.169917f, 0.108947f, 0.069855f, 0.0287182f};
|
|
|
|
volatile double progress = 0.0;
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
// Issue 1676
|
|
// Moved from inside the parallel section
|
|
if (plistener) {
|
|
plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::methodstring[RAWParams::amaze]));
|
|
plistener->setProgress (0.0);
|
|
}
|
|
struct s_hv {
|
|
float h;
|
|
float v;
|
|
};
|
|
|
|
#pragma omp parallel
|
|
{
|
|
int progresscounter=0;
|
|
//position of top/left corner of the tile
|
|
int top, left;
|
|
// beginning of storage block for tile
|
|
char *buffer;
|
|
// green values
|
|
float (*rgbgreen);
|
|
|
|
// sum of square of horizontal gradient and square of vertical gradient
|
|
float (*delhvsqsum);
|
|
// gradient based directional weights for interpolation
|
|
float (*dirwts0);
|
|
float (*dirwts1);
|
|
|
|
// vertically interpolated color differences G-R, G-B
|
|
float (*vcd);
|
|
// horizontally interpolated color differences
|
|
float (*hcd);
|
|
// alternative vertical interpolation
|
|
float (*vcdalt);
|
|
// alternative horizontal interpolation
|
|
float (*hcdalt);
|
|
// square of average color difference
|
|
float (*cddiffsq);
|
|
// weight to give horizontal vs vertical interpolation
|
|
float (*hvwt);
|
|
// final interpolated color difference
|
|
float (*Dgrb)[TS*TSH];
|
|
// float (*Dgrb)[2];
|
|
// gradient in plus (NE/SW) direction
|
|
float (*delp);
|
|
// gradient in minus (NW/SE) direction
|
|
float (*delm);
|
|
// diagonal interpolation of R+B
|
|
float (*rbint);
|
|
// horizontal and vertical curvature of interpolated G (used to refine interpolation in Nyquist texture regions)
|
|
s_hv (*Dgrb2);
|
|
// difference between up/down interpolations of G
|
|
float (*dgintv);
|
|
// difference between left/right interpolations of G
|
|
float (*dginth);
|
|
// diagonal (plus) color difference R-B or G1-G2
|
|
// float (*Dgrbp1);
|
|
// diagonal (minus) color difference R-B or G1-G2
|
|
// float (*Dgrbm1);
|
|
float (*Dgrbsq1m);
|
|
float (*Dgrbsq1p);
|
|
// s_mp (*Dgrbsq1);
|
|
// square of diagonal color difference
|
|
// float (*Dgrbpsq1);
|
|
// square of diagonal color difference
|
|
// float (*Dgrbmsq1);
|
|
// tile raw data
|
|
float (*cfa);
|
|
// relative weight for combining plus and minus diagonal interpolations
|
|
float (*pmwt);
|
|
// interpolated color difference R-B in minus and plus direction
|
|
float (*rbm);
|
|
float (*rbp);
|
|
|
|
// nyquist texture flag 1=nyquist, 0=not nyquist
|
|
char (*nyquist);
|
|
|
|
#define CLF 1
|
|
// assign working space
|
|
buffer = (char *) calloc(22*sizeof(float)*TS*TS + sizeof(char)*TS*TSH+23*CLF*64 + 63, 1);
|
|
char *data;
|
|
data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
|
|
|
|
//merror(buffer,"amaze_interpolate()");
|
|
rgbgreen = (float (*)) data; //pointers to array
|
|
delhvsqsum = (float (*)) ((char*)rgbgreen + sizeof(float)*TS*TS + CLF*64);
|
|
dirwts0 = (float (*)) ((char*)delhvsqsum + sizeof(float)*TS*TS + CLF*64);
|
|
dirwts1 = (float (*)) ((char*)dirwts0 + sizeof(float)*TS*TS + CLF*64);
|
|
vcd = (float (*)) ((char*)dirwts1 + sizeof(float)*TS*TS + CLF*64);
|
|
hcd = (float (*)) ((char*)vcd + sizeof(float)*TS*TS + CLF*64);
|
|
vcdalt = (float (*)) ((char*)hcd + sizeof(float)*TS*TS + CLF*64);
|
|
hcdalt = (float (*)) ((char*)vcdalt + sizeof(float)*TS*TS + CLF*64);
|
|
cddiffsq = (float (*)) ((char*)hcdalt + sizeof(float)*TS*TS + CLF*64);
|
|
hvwt = (float (*)) ((char*)cddiffsq + sizeof(float)*TS*TS + CLF*64);
|
|
Dgrb = (float (*)[TS*TSH]) ((char*)hvwt + sizeof(float)*TS*TSH + CLF*64);
|
|
delp = (float (*)) ((char*)Dgrb + sizeof(float)*TS*TS + CLF*64);
|
|
delm = (float (*)) ((char*)delp + sizeof(float)*TS*TSH + CLF*64);
|
|
rbint = (float (*)) ((char*)delm + sizeof(float)*TS*TSH + CLF*64);
|
|
Dgrb2 = (s_hv (*)) ((char*)rbint + sizeof(float)*TS*TSH + CLF*64);
|
|
dgintv = (float (*)) ((char*)Dgrb2 + sizeof(float)*TS*TS + CLF*64);
|
|
dginth = (float (*)) ((char*)dgintv + sizeof(float)*TS*TS + CLF*64);
|
|
Dgrbsq1m = (float (*)) ((char*)dginth + sizeof(float)*TS*TS + CLF*64);
|
|
Dgrbsq1p = (float (*)) ((char*)Dgrbsq1m + sizeof(float)*TS*TSH + CLF*64);
|
|
cfa = (float (*)) ((char*)Dgrbsq1p + sizeof(float)*TS*TSH + CLF*64);
|
|
pmwt = (float (*)) ((char*)cfa + sizeof(float)*TS*TS + CLF*64);
|
|
rbm = (float (*)) ((char*)pmwt + sizeof(float)*TS*TSH + CLF*64);
|
|
rbp = (float (*)) ((char*)rbm + sizeof(float)*TS*TSH + CLF*64);
|
|
|
|
nyquist = (char (*)) ((char*)rbp + sizeof(float)*TS*TSH + CLF*64);
|
|
#undef CLF
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
//determine GRBG coset; (ey,ex) is the offset of the R subarray
|
|
if (FC(0,0)==1) {//first pixel is G
|
|
if (FC(0,1)==0) {ey=0; ex=1;} else {ey=1; ex=0;}
|
|
} else {//first pixel is R or B
|
|
if (FC(0,0)==0) {ey=0; ex=0;} else {ey=1; ex=1;}
|
|
}
|
|
|
|
// Main algorithm: Tile loop
|
|
//#pragma omp parallel for shared(rawData,height,width,red,green,blue) private(top,left) schedule(dynamic)
|
|
//code is openmp ready; just have to pull local tile variable declarations inside the tile loop
|
|
|
|
// Issue 1676
|
|
// use collapse(2) to collapse the 2 loops to one large loop, so there is better scaling
|
|
#pragma omp for schedule(dynamic) collapse(2) nowait
|
|
for (top=winy-16; top < winy+height; top += TS-32)
|
|
for (left=winx-16; left < winx+width; left += TS-32) {
|
|
memset(nyquist, 0, sizeof(char)*TS*TSH);
|
|
memset(rbint, 0, sizeof(float)*TS*TSH);
|
|
//location of tile bottom edge
|
|
int bottom = min(top+TS,winy+height+16);
|
|
//location of tile right edge
|
|
int right = min(left+TS, winx+width+16);
|
|
//tile width (=TS except for right edge of image)
|
|
int rr1 = bottom - top;
|
|
//tile height (=TS except for bottom edge of image)
|
|
int cc1 = right - left;
|
|
|
|
//tile vars
|
|
//counters for pixel location in the image
|
|
int row, col;
|
|
//min and max row/column in the tile
|
|
int rrmin, rrmax, ccmin, ccmax;
|
|
//counters for pixel location within the tile
|
|
int rr, cc;
|
|
//color index 0=R, 1=G, 2=B
|
|
int c;
|
|
//pointer counters within the tile
|
|
int indx, indx1;
|
|
//dummy indices
|
|
int i, j;
|
|
|
|
//color ratios in up/down/left/right directions
|
|
float cru, crd, crl, crr;
|
|
//adaptive weights for vertical/horizontal/plus/minus directions
|
|
float vwt, hwt, pwt, mwt;
|
|
//vertical and horizontal G interpolations
|
|
float Gintv, Ginth;
|
|
//G interpolated in vert/hor directions using adaptive ratios
|
|
float guar, gdar, glar, grar;
|
|
//G interpolated in vert/hor directions using Hamilton-Adams method
|
|
float guha, gdha, glha, grha;
|
|
//interpolated G from fusing left/right or up/down
|
|
float Ginthar, Ginthha, Gintvar, Gintvha;
|
|
//color difference (G-R or G-B) variance in up/down/left/right directions
|
|
float Dgrbvvaru, Dgrbvvard, Dgrbhvarl, Dgrbhvarr;
|
|
|
|
float uave, dave, lave, rave;
|
|
|
|
//color difference variances in vertical and horizontal directions
|
|
float vcdvar, hcdvar, vcdvar1, hcdvar1, hcdaltvar, vcdaltvar;
|
|
//adaptive interpolation weight using variance of color differences
|
|
float varwt; // 639 - 644
|
|
//adaptive interpolation weight using difference of left-right and up-down G interpolations
|
|
float diffwt; // 640 - 644
|
|
//alternative adaptive weight for combining horizontal/vertical interpolations
|
|
float hvwtalt; // 745 - 748
|
|
//interpolation of G in four directions
|
|
float gu, gd, gl, gr;
|
|
//variance of G in vertical/horizontal directions
|
|
float gvarh, gvarv;
|
|
|
|
//Nyquist texture test
|
|
float nyqtest; // 658 - 681
|
|
//accumulators for Nyquist texture interpolation
|
|
float sumh, sumv, sumsqh, sumsqv, areawt;
|
|
|
|
//color ratios in diagonal directions
|
|
float crse, crnw, crne, crsw;
|
|
//color differences in diagonal directions
|
|
float rbse, rbnw, rbne, rbsw;
|
|
//adaptive weights for combining diagonal interpolations
|
|
float wtse, wtnw, wtsw, wtne;
|
|
//alternate weight for combining diagonal interpolations
|
|
float pmwtalt; // 885 - 888
|
|
//variance of R-B in plus/minus directions
|
|
float rbvarm; // 843 - 848
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
// rgb from input CFA data
|
|
// rgb values should be floating point number between 0 and 1
|
|
// after white balance multipliers are applied
|
|
// a 16 pixel border is added to each side of the image
|
|
|
|
// bookkeeping for borders
|
|
if (top<winy) {rrmin=16;} else {rrmin=0;}
|
|
if (left<winx) {ccmin=16;} else {ccmin=0;}
|
|
if (bottom>(winy+height)) {rrmax=winy+height-top;} else {rrmax=rr1;}
|
|
if (right>(winx+width)) {ccmax=winx+width-left;} else {ccmax=cc1;}
|
|
|
|
#ifdef __SSE2__
|
|
const __m128 c65535v = _mm_set1_ps( 65535.0f );
|
|
__m128 tempv;
|
|
for (rr=rrmin; rr < rrmax; rr++){
|
|
for (row=rr+top, cc=ccmin; cc < ccmax-3; cc+=4) {
|
|
indx1=rr*TS+cc;
|
|
tempv = LVFU(rawData[row][cc+left]) / c65535v;
|
|
_mm_store_ps( &cfa[indx1], tempv );
|
|
_mm_store_ps( &rgbgreen[indx1], tempv );
|
|
}
|
|
for (; cc < ccmax; cc++) {
|
|
indx1=rr*TS+cc;
|
|
cfa[indx1] = (rawData[row][cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[indx1] = cfa[indx1];
|
|
|
|
}
|
|
|
|
}
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
//fill borders
|
|
if (rrmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=ccmin,row = 32-rr+top; cc<ccmax; cc++) {
|
|
cfa[rr*TS+cc] = (rawData[row][cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
|
|
}
|
|
}
|
|
if (rrmax<rr1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=ccmin; cc<ccmax; cc+=4) {
|
|
indx1 = (rrmax+rr)*TS+cc;
|
|
tempv = LVFU(rawData[(winy+height-rr-2)][left+cc]) / c65535v;
|
|
_mm_store_ps( &cfa[indx1], tempv );
|
|
_mm_store_ps( &rgbgreen[indx1], tempv );
|
|
}
|
|
}
|
|
|
|
if (ccmin>0) {
|
|
for (rr=rrmin; rr<rrmax; rr++)
|
|
for (cc=0,row = rr + top; cc<16; cc++) {
|
|
cfa[rr*TS+cc] = (rawData[row][32-cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
|
|
}
|
|
}
|
|
|
|
if (ccmax<cc1) {
|
|
for (rr=rrmin; rr<rrmax; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[rr*TS+ccmax+cc] = (rawData[(top+rr)][(winx+width-cc-2)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+ccmax+cc] = cfa[rr*TS+ccmax+cc];
|
|
}
|
|
}
|
|
//also, fill the image corners
|
|
if (rrmin>0 && ccmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc+=4) {
|
|
indx1 = (rr)*TS+cc;
|
|
tempv = LVFU(rawData[winy+32-rr][winx+32-cc]) / c65535v;
|
|
_mm_store_ps( &cfa[indx1], tempv );
|
|
_mm_store_ps( &rgbgreen[indx1], tempv );
|
|
}
|
|
}
|
|
if (rrmax<rr1 && ccmax<cc1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc+=4) {
|
|
indx1 = (rrmax+rr)*TS+ccmax+cc;
|
|
tempv = LVFU(rawData[(winy+height-rr-2)][(winx+width-cc-2)]) / c65535v;
|
|
_mm_storeu_ps( &cfa[indx1], tempv );
|
|
_mm_storeu_ps( &rgbgreen[indx1], tempv );
|
|
}
|
|
}
|
|
if (rrmin>0 && ccmax<cc1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
|
|
cfa[(rr)*TS+ccmax+cc] = (rawData[(winy+32-rr)][(winx+width-cc-2)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rr)*TS+ccmax+cc] = cfa[(rr)*TS+ccmax+cc];
|
|
}
|
|
}
|
|
if (rrmax<rr1 && ccmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
|
|
}
|
|
}
|
|
|
|
#else
|
|
for (rr=rrmin; rr < rrmax; rr++)
|
|
for (row=rr+top, cc=ccmin; cc < ccmax; cc++) {
|
|
indx1=rr*TS+cc;
|
|
cfa[indx1] = (rawData[row][cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[indx1] = cfa[indx1];
|
|
|
|
}
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
//fill borders
|
|
if (rrmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=ccmin,row = 32-rr+top; cc<ccmax; cc++) {
|
|
cfa[rr*TS+cc] = (rawData[row][cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
|
|
}
|
|
}
|
|
if (rrmax<rr1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=ccmin; cc<ccmax; cc++) {
|
|
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][left+cc])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
|
|
}
|
|
}
|
|
if (ccmin>0) {
|
|
for (rr=rrmin; rr<rrmax; rr++)
|
|
for (cc=0,row = rr + top; cc<16; cc++) {
|
|
cfa[rr*TS+cc] = (rawData[row][32-cc+left])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+cc] = cfa[rr*TS+cc];
|
|
}
|
|
}
|
|
if (ccmax<cc1) {
|
|
for (rr=rrmin; rr<rrmax; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[rr*TS+ccmax+cc] = (rawData[(top+rr)][(winx+width-cc-2)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[rr*TS+ccmax+cc] = cfa[rr*TS+ccmax+cc];
|
|
}
|
|
}
|
|
|
|
//also, fill the image corners
|
|
if (rrmin>0 && ccmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[(rr)*TS+cc] = (rawData[winy+32-rr][winx+32-cc])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rr)*TS+cc] = cfa[(rr)*TS+cc];
|
|
}
|
|
}
|
|
if (rrmax<rr1 && ccmax<cc1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[(rrmax+rr)*TS+ccmax+cc] = (rawData[(winy+height-rr-2)][(winx+width-cc-2)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rrmax+rr)*TS+ccmax+cc] = cfa[(rrmax+rr)*TS+ccmax+cc];
|
|
}
|
|
}
|
|
if (rrmin>0 && ccmax<cc1) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[(rr)*TS+ccmax+cc] = (rawData[(winy+32-rr)][(winx+width-cc-2)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rr)*TS+ccmax+cc] = cfa[(rr)*TS+ccmax+cc];
|
|
}
|
|
}
|
|
if (rrmax<rr1 && ccmin>0) {
|
|
for (rr=0; rr<16; rr++)
|
|
for (cc=0; cc<16; cc++) {
|
|
cfa[(rrmax+rr)*TS+cc] = (rawData[(winy+height-rr-2)][(winx+32-cc)])/65535.0f;
|
|
if(FC(rr,cc)==1)
|
|
rgbgreen[(rrmax+rr)*TS+cc] = cfa[(rrmax+rr)*TS+cc];
|
|
}
|
|
}
|
|
#endif
|
|
|
|
//end of border fill
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
#ifdef __SSE2__
|
|
__m128 delhv,delvv;
|
|
const __m128 epsv = _mm_set1_ps( eps );
|
|
|
|
for (rr=2; rr < rr1-2; rr++) {
|
|
for (cc=0, indx=(rr)*TS+cc; cc < cc1; cc+=4, indx+=4) {
|
|
delhv = vabsf( LVFU( cfa[indx+1] ) - LVFU( cfa[indx-1] ) );
|
|
delvv = vabsf( LVF( cfa[indx+v1] ) - LVF( cfa[indx-v1] ) );
|
|
_mm_store_ps( &dirwts1[indx], epsv + vabsf( LVFU( cfa[indx+2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVFU( cfa[indx-2] )) + delhv );
|
|
delhv = delhv * delhv;
|
|
_mm_store_ps( &dirwts0[indx], epsv + vabsf( LVF( cfa[indx+v2] ) - LVF( cfa[indx] )) + vabsf( LVF( cfa[indx] ) - LVF( cfa[indx-v2] )) + delvv );
|
|
delvv = delvv * delvv;
|
|
_mm_store_ps( &delhvsqsum[indx], delhv + delvv);
|
|
}
|
|
}
|
|
#else
|
|
// horizontal and vedrtical gradient
|
|
float delh,delv;
|
|
for (rr=2; rr < rr1-2; rr++)
|
|
for (cc=2, indx=(rr)*TS+cc; cc < cc1-2; cc++, indx++) {
|
|
delh = fabsf(cfa[indx+1]-cfa[indx-1]);
|
|
delv = fabsf(cfa[indx+v1]-cfa[indx-v1]);
|
|
dirwts0[indx] = eps+fabsf(cfa[indx+v2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-v2])+delv;
|
|
dirwts1[indx] = eps+fabsf(cfa[indx+2]-cfa[indx])+fabsf(cfa[indx]-cfa[indx-2])+delh;//+fabsf(cfa[indx+2]-cfa[indx-2]);
|
|
delhvsqsum[indx] = SQR(delh) + SQR(delv);
|
|
}
|
|
#endif
|
|
|
|
#ifdef __SSE2__
|
|
__m128 Dgrbsq1pv, Dgrbsq1mv,temp2v;
|
|
for (rr=6; rr < rr1-6; rr++){
|
|
if((FC(rr,2)&1)==0) {
|
|
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) {
|
|
tempv = LC2VFU(cfa[indx+1]);
|
|
Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx+1-p1]))+SQRV(tempv-LC2VFU(cfa[indx+1+p1])));
|
|
_mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+p1])-LC2VFU(cfa[indx-p1])));
|
|
_mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+m1])-LC2VFU(cfa[indx-m1])));
|
|
Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx+1-m1]))+SQRV(tempv-LC2VFU(cfa[indx+1+m1])));
|
|
_mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv );
|
|
_mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv );
|
|
}
|
|
}
|
|
else {
|
|
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=8, indx+=8) {
|
|
tempv = LC2VFU(cfa[indx]);
|
|
Dgrbsq1pv = (SQRV(tempv-LC2VFU(cfa[indx-p1]))+SQRV(tempv-LC2VFU(cfa[indx+p1])));
|
|
_mm_storeu_ps( &delp[indx>>1], vabsf(LC2VFU(cfa[indx+1+p1])-LC2VFU(cfa[indx+1-p1])));
|
|
_mm_storeu_ps( &delm[indx>>1], vabsf(LC2VFU(cfa[indx+1+m1])-LC2VFU(cfa[indx+1-m1])));
|
|
Dgrbsq1mv = (SQRV(tempv-LC2VFU(cfa[indx-m1]))+SQRV(tempv-LC2VFU(cfa[indx+m1])));
|
|
_mm_storeu_ps( &Dgrbsq1m[indx>>1], Dgrbsq1mv );
|
|
_mm_storeu_ps( &Dgrbsq1p[indx>>1], Dgrbsq1pv );
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
for (rr=6; rr < rr1-6; rr++){
|
|
if((FC(rr,2)&1)==0) {
|
|
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) {
|
|
delp[indx>>1] = fabsf(cfa[indx+p1]-cfa[indx-p1]);
|
|
delm[indx>>1] = fabsf(cfa[indx+m1]-cfa[indx-m1]);
|
|
Dgrbsq1p[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-p1])+SQR(cfa[indx+1]-cfa[indx+1+p1]));
|
|
Dgrbsq1m[indx>>1]=(SQR(cfa[indx+1]-cfa[indx+1-m1])+SQR(cfa[indx+1]-cfa[indx+1+m1]));
|
|
}
|
|
}
|
|
else {
|
|
for (cc=6, indx=(rr)*TS+cc; cc < cc1-6; cc+=2, indx+=2) {
|
|
Dgrbsq1p[indx>>1]=(SQR(cfa[indx]-cfa[indx-p1])+SQR(cfa[indx]-cfa[indx+p1]));
|
|
Dgrbsq1m[indx>>1]=(SQR(cfa[indx]-cfa[indx-m1])+SQR(cfa[indx]-cfa[indx+m1]));
|
|
delp[indx>>1] = fabsf(cfa[indx+1+p1]-cfa[indx+1-p1]);
|
|
delm[indx>>1] = fabsf(cfa[indx+1+m1]-cfa[indx+1-m1]);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// end of tile initialization
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
//interpolate vertical and horizontal color differences
|
|
|
|
#ifdef __SSE2__
|
|
__m128 sgnv,cruv,crdv,crlv,crrv,guhav,gdhav,glhav,grhav,hwtv,vwtv,Gintvhav,Ginthhav,guarv,gdarv,glarv,grarv;
|
|
vmask clipmask;
|
|
if( !(FC(4,4)&1) )
|
|
sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f );
|
|
else
|
|
sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f );
|
|
|
|
__m128 zd5v = _mm_set1_ps( 0.5f );
|
|
__m128 onev = _mm_set1_ps( 1.0f );
|
|
__m128 arthreshv = _mm_set1_ps( arthresh );
|
|
__m128 clip_pt8v = _mm_set1_ps( clip_pt8 );
|
|
|
|
for (rr=4; rr<rr1-4; rr++) {
|
|
sgnv = -sgnv;
|
|
for (cc=4,indx=rr*TS+cc; cc<cc1-7; cc+=4,indx+=4) {
|
|
//color ratios in each cardinal direction
|
|
cruv = LVF(cfa[indx-v1])*(LVF(dirwts0[indx-v2])+LVF(dirwts0[indx]))/(LVF(dirwts0[indx-v2])*(epsv+LVF(cfa[indx]))+LVF(dirwts0[indx])*(epsv+LVF(cfa[indx-v2])));
|
|
crdv = LVF(cfa[indx+v1])*(LVF(dirwts0[indx+v2])+LVF(dirwts0[indx]))/(LVF(dirwts0[indx+v2])*(epsv+LVF(cfa[indx]))+LVF(dirwts0[indx])*(epsv+LVF(cfa[indx+v2])));
|
|
crlv = LVFU(cfa[indx-1])*(LVFU(dirwts1[indx-2])+LVF(dirwts1[indx]))/(LVFU(dirwts1[indx-2])*(epsv+LVF(cfa[indx]))+LVF(dirwts1[indx])*(epsv+LVFU(cfa[indx-2])));
|
|
crrv = LVFU(cfa[indx+1])*(LVFU(dirwts1[indx+2])+LVF(dirwts1[indx]))/(LVFU(dirwts1[indx+2])*(epsv+LVF(cfa[indx]))+LVF(dirwts1[indx])*(epsv+LVFU(cfa[indx+2])));
|
|
|
|
guhav=LVF(cfa[indx-v1])+zd5v*(LVF(cfa[indx])-LVF(cfa[indx-v2]));
|
|
gdhav=LVF(cfa[indx+v1])+zd5v*(LVF(cfa[indx])-LVF(cfa[indx+v2]));
|
|
glhav=LVFU(cfa[indx-1])+zd5v*(LVF(cfa[indx])-LVFU(cfa[indx-2]));
|
|
grhav=LVFU(cfa[indx+1])+zd5v*(LVF(cfa[indx])-LVFU(cfa[indx+2]));
|
|
|
|
guarv = vself(vmaskf_lt(vabsf(onev-cruv), arthreshv), LVF(cfa[indx])*cruv, guhav);
|
|
gdarv = vself(vmaskf_lt(vabsf(onev-crdv), arthreshv), LVF(cfa[indx])*crdv, gdhav);
|
|
glarv = vself(vmaskf_lt(vabsf(onev-crlv), arthreshv), LVF(cfa[indx])*crlv, glhav);
|
|
grarv = vself(vmaskf_lt(vabsf(onev-crrv), arthreshv), LVF(cfa[indx])*crrv, grhav);
|
|
|
|
hwtv = LVFU(dirwts1[indx-1])/(LVFU(dirwts1[indx-1])+LVFU(dirwts1[indx+1]));
|
|
vwtv = LVF(dirwts0[indx-v1])/(LVF(dirwts0[indx+v1])+LVF(dirwts0[indx-v1]));
|
|
|
|
//interpolated G via adaptive weights of cardinal evaluations
|
|
Ginthhav = hwtv*grhav+(onev-hwtv)*glhav;
|
|
Gintvhav = vwtv*gdhav+(onev-vwtv)*guhav;
|
|
//interpolated color differences
|
|
|
|
_mm_store_ps( &hcdalt[indx], sgnv*(Ginthhav-LVF(cfa[indx])));
|
|
_mm_store_ps( &vcdalt[indx], sgnv*(Gintvhav-LVF(cfa[indx])));
|
|
|
|
clipmask = vorm( vorm( vmaskf_gt( LVF(cfa[indx]), clip_pt8v ), vmaskf_gt( Gintvhav, clip_pt8v ) ), vmaskf_gt( Ginthhav, clip_pt8v ));
|
|
guarv = vself( clipmask, guhav, guarv);
|
|
gdarv = vself( clipmask, gdhav, gdarv);
|
|
glarv = vself( clipmask, glhav, glarv);
|
|
grarv = vself( clipmask, grhav, grarv);
|
|
_mm_store_ps( &vcd[indx], vself( clipmask, LVF(vcdalt[indx]), sgnv*((vwtv*gdarv+(onev-vwtv)*guarv)-LVF(cfa[indx]))));
|
|
_mm_store_ps( &hcd[indx], vself( clipmask, LVF(hcdalt[indx]), sgnv*((hwtv*grarv+(onev-hwtv)*glarv)-LVF(cfa[indx]))));
|
|
//differences of interpolations in opposite directions
|
|
|
|
_mm_store_ps(&dgintv[indx],_mm_min_ps(SQRV(guhav-gdhav),SQRV(guarv-gdarv)));
|
|
_mm_store_ps(&dginth[indx],_mm_min_ps(SQRV(glhav-grhav),SQRV(glarv-grarv)));
|
|
|
|
}
|
|
}
|
|
#else
|
|
bool fcswitch;
|
|
for (rr=4; rr<rr1-4; rr++) {
|
|
for (cc=4,indx=rr*TS+cc,fcswitch = FC(rr,cc)&1; cc<cc1-4; cc++,indx++) {
|
|
|
|
//color ratios in each cardinal direction
|
|
cru = cfa[indx-v1]*(dirwts0[indx-v2]+dirwts0[indx])/(dirwts0[indx-v2]*(eps+cfa[indx])+dirwts0[indx]*(eps+cfa[indx-v2]));
|
|
crd = cfa[indx+v1]*(dirwts0[indx+v2]+dirwts0[indx])/(dirwts0[indx+v2]*(eps+cfa[indx])+dirwts0[indx]*(eps+cfa[indx+v2]));
|
|
crl = cfa[indx-1]*(dirwts1[indx-2]+dirwts1[indx])/(dirwts1[indx-2]*(eps+cfa[indx])+dirwts1[indx]*(eps+cfa[indx-2]));
|
|
crr = cfa[indx+1]*(dirwts1[indx+2]+dirwts1[indx])/(dirwts1[indx+2]*(eps+cfa[indx])+dirwts1[indx]*(eps+cfa[indx+2]));
|
|
|
|
guha=HCLIP(cfa[indx-v1])+xdiv2f(cfa[indx]-cfa[indx-v2]);
|
|
gdha=HCLIP(cfa[indx+v1])+xdiv2f(cfa[indx]-cfa[indx+v2]);
|
|
glha=HCLIP(cfa[indx-1])+xdiv2f(cfa[indx]-cfa[indx-2]);
|
|
grha=HCLIP(cfa[indx+1])+xdiv2f(cfa[indx]-cfa[indx+2]);
|
|
|
|
if (fabsf(1.0f-cru)<arthresh) {guar=cfa[indx]*cru;} else {guar=guha;}
|
|
if (fabsf(1.0f-crd)<arthresh) {gdar=cfa[indx]*crd;} else {gdar=gdha;}
|
|
if (fabsf(1.0f-crl)<arthresh) {glar=cfa[indx]*crl;} else {glar=glha;}
|
|
if (fabsf(1.0f-crr)<arthresh) {grar=cfa[indx]*crr;} else {grar=grha;}
|
|
|
|
hwt = dirwts1[indx-1]/(dirwts1[indx-1]+dirwts1[indx+1]);
|
|
vwt = dirwts0[indx-v1]/(dirwts0[indx+v1]+dirwts0[indx-v1]);
|
|
|
|
//interpolated G via adaptive weights of cardinal evaluations
|
|
Gintvha = vwt*gdha+(1.0f-vwt)*guha;
|
|
Ginthha = hwt*grha+(1.0f-hwt)*glha;
|
|
//interpolated color differences
|
|
if (fcswitch) {
|
|
vcd[indx] = cfa[indx]-(vwt*gdar+(1.0f-vwt)*guar);
|
|
hcd[indx] = cfa[indx]-(hwt*grar+(1.0f-hwt)*glar);
|
|
vcdalt[indx] = cfa[indx]-Gintvha;
|
|
hcdalt[indx] = cfa[indx]-Ginthha;
|
|
} else {
|
|
//interpolated color differences
|
|
vcd[indx] = (vwt*gdar+(1.0f-vwt)*guar)-cfa[indx];
|
|
hcd[indx] = (hwt*grar+(1.0f-hwt)*glar)-cfa[indx];
|
|
vcdalt[indx] = Gintvha-cfa[indx];
|
|
hcdalt[indx] = Ginthha-cfa[indx];
|
|
}
|
|
fcswitch = !fcswitch;
|
|
|
|
if (cfa[indx] > clip_pt8 || Gintvha > clip_pt8 || Ginthha > clip_pt8) {
|
|
//use HA if highlights are (nearly) clipped
|
|
guar=guha; gdar=gdha; glar=glha; grar=grha;
|
|
vcd[indx]=vcdalt[indx]; hcd[indx]=hcdalt[indx];
|
|
}
|
|
|
|
//differences of interpolations in opposite directions
|
|
dgintv[indx]=min(SQR(guha-gdha),SQR(guar-gdar));
|
|
dginth[indx]=min(SQR(glha-grha),SQR(glar-grar));
|
|
|
|
}
|
|
|
|
|
|
}
|
|
#endif
|
|
|
|
#ifdef __SSE2__
|
|
__m128 hcdvarv, vcdvarv;
|
|
__m128 hcdaltvarv,vcdaltvarv,hcdv,vcdv,hcdaltv,vcdaltv,sgn3v,Ginthv,Gintvv,hcdoldv,vcdoldv;
|
|
__m128 threev = _mm_set1_ps( 3.0f );
|
|
__m128 clip_ptv = _mm_set1_ps( clip_pt );
|
|
__m128 nsgnv;
|
|
vmask hcdmask, vcdmask,tempmask;
|
|
|
|
if( !(FC(4,4)&1) )
|
|
sgnv = _mm_set_ps( 1.0f, -1.0f, 1.0f, -1.0f );
|
|
else
|
|
sgnv = _mm_set_ps( -1.0f, 1.0f, -1.0f, 1.0f );
|
|
|
|
sgn3v = threev * sgnv;
|
|
for (rr=4; rr<rr1-4; rr++) {
|
|
nsgnv = sgnv;
|
|
sgnv = -sgnv;
|
|
sgn3v = -sgn3v;
|
|
for (cc=4,indx=rr*TS+cc,c=FC(rr,cc)&1; cc<cc1-4; cc+=4,indx+=4) {
|
|
hcdv = LVF( hcd[indx] );
|
|
hcdvarv = threev*(SQRV(LVFU(hcd[indx-2]))+SQRV(hcdv)+SQRV(LVFU(hcd[indx+2])))-SQRV(LVFU(hcd[indx-2])+hcdv+LVFU(hcd[indx+2]));
|
|
hcdaltv = LVF( hcdalt[indx] );
|
|
hcdaltvarv = threev*(SQRV(LVFU(hcdalt[indx-2]))+SQRV(hcdaltv)+SQRV(LVFU(hcdalt[indx+2])))-SQRV(LVFU(hcdalt[indx-2])+hcdaltv+LVFU(hcdalt[indx+2]));
|
|
vcdv = LVF( vcd[indx] );
|
|
vcdvarv = threev*(SQRV(LVF(vcd[indx-v2]))+SQRV(vcdv)+SQRV(LVF(vcd[indx+v2])))-SQRV(LVF(vcd[indx-v2])+vcdv+LVF(vcd[indx+v2]));
|
|
vcdaltv = LVF( vcdalt[indx] );
|
|
vcdaltvarv = threev*(SQRV(LVF(vcdalt[indx-v2]))+SQRV(vcdaltv)+SQRV(LVF(vcdalt[indx+v2])))-SQRV(LVF(vcdalt[indx-v2])+vcdaltv+LVF(vcdalt[indx+v2]));
|
|
//choose the smallest variance; this yields a smoother interpolation
|
|
hcdv = vself( vmaskf_lt( hcdaltvarv, hcdvarv ), hcdaltv, hcdv);
|
|
vcdv = vself( vmaskf_lt( vcdaltvarv, vcdvarv ), vcdaltv, vcdv);
|
|
|
|
Ginthv = sgnv * hcdv + LVF( cfa[indx] );
|
|
temp2v = sgn3v * hcdv;
|
|
hwtv = onev + temp2v / ( epsv + Ginthv + LVF( cfa[indx]));
|
|
hcdmask = vmaskf_gt( nsgnv * hcdv, ZEROV );
|
|
hcdoldv = hcdv;
|
|
tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Ginthv, LVFU(cfa[indx-1]), LVFU(cfa[indx+1]) ));
|
|
hcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx])+Ginthv)), tempv, hwtv*hcdv + (onev - hwtv)*tempv);
|
|
hcdv = vself( hcdmask, hcdv, hcdoldv );
|
|
hcdv = vself( vmaskf_gt( Ginthv, clip_ptv), tempv, hcdv);
|
|
_mm_store_ps( &hcd[indx], hcdv);
|
|
|
|
Gintvv = sgnv * vcdv + LVF( cfa[indx] );
|
|
temp2v = sgn3v * vcdv;
|
|
vwtv = onev + temp2v / ( epsv + Gintvv + LVF( cfa[indx]));
|
|
vcdmask = vmaskf_gt( nsgnv * vcdv, ZEROV );
|
|
vcdoldv = vcdv;
|
|
tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Gintvv, LVF(cfa[indx-v1]), LVF(cfa[indx+v1]) ));
|
|
vcdv = vself( vmaskf_lt( (temp2v), -(LVF(cfa[indx])+Gintvv)), tempv, vwtv*vcdv + (onev - vwtv)*tempv);
|
|
vcdv = vself( vcdmask, vcdv, vcdoldv );
|
|
vcdv = vself( vmaskf_gt( Gintvv, clip_ptv), tempv, vcdv);
|
|
_mm_store_ps( &vcd[indx], vcdv);
|
|
_mm_storeu_ps(&cddiffsq[indx], SQRV(vcdv-hcdv));
|
|
}
|
|
|
|
}
|
|
#else
|
|
for (rr=4; rr<rr1-4; rr++) {
|
|
//for (cc=4+(FC(rr,2)&1),indx=rr*TS+cc,c=FC(rr,cc); cc<cc1-4; cc+=2,indx+=2) {
|
|
for (cc=4,indx=rr*TS+cc,c=FC(rr,cc)&1; cc<cc1-4; cc++,indx++) {
|
|
hcdvar =3.0f*(SQR(hcd[indx-2])+SQR(hcd[indx])+SQR(hcd[indx+2]))-SQR(hcd[indx-2]+hcd[indx]+hcd[indx+2]);
|
|
hcdaltvar =3.0f*(SQR(hcdalt[indx-2])+SQR(hcdalt[indx])+SQR(hcdalt[indx+2]))-SQR(hcdalt[indx-2]+hcdalt[indx]+hcdalt[indx+2]);
|
|
vcdvar =3.0f*(SQR(vcd[indx-v2])+SQR(vcd[indx])+SQR(vcd[indx+v2]))-SQR(vcd[indx-v2]+vcd[indx]+vcd[indx+v2]);
|
|
vcdaltvar =3.0f*(SQR(vcdalt[indx-v2])+SQR(vcdalt[indx])+SQR(vcdalt[indx+v2]))-SQR(vcdalt[indx-v2]+vcdalt[indx]+vcdalt[indx+v2]);
|
|
//choose the smallest variance; this yields a smoother interpolation
|
|
if (hcdaltvar<hcdvar) hcd[indx]=hcdalt[indx];
|
|
if (vcdaltvar<vcdvar) vcd[indx]=vcdalt[indx];
|
|
|
|
//bound the interpolation in regions of high saturation
|
|
if (c) {//G site
|
|
Ginth = -hcd[indx]+cfa[indx];//R or B
|
|
Gintv = -vcd[indx]+cfa[indx];//B or R
|
|
|
|
if (hcd[indx]>0) {
|
|
if (3.0f*hcd[indx] > (Ginth+cfa[indx])) {
|
|
hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];
|
|
} else {
|
|
hwt = 1.0f -3.0f*hcd[indx]/(eps+Ginth+cfa[indx]);
|
|
hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx]);
|
|
}
|
|
}
|
|
if (vcd[indx]>0) {
|
|
if (3.0f*vcd[indx] > (Gintv+cfa[indx])) {
|
|
vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
|
|
} else {
|
|
vwt = 1.0f -3.0f*vcd[indx]/(eps+Gintv+cfa[indx]);
|
|
vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx]);
|
|
}
|
|
}
|
|
|
|
if (Ginth > clip_pt) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for RT implementation
|
|
if (Gintv > clip_pt) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
|
|
//if (Ginth > pre_mul[c]) hcd[indx]=-ULIM(Ginth,cfa[indx-1],cfa[indx+1])+cfa[indx];//for dcraw implementation
|
|
//if (Gintv > pre_mul[c]) vcd[indx]=-ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])+cfa[indx];
|
|
|
|
} else {//R or B site
|
|
|
|
Ginth = hcd[indx]+cfa[indx];//interpolated G
|
|
Gintv = vcd[indx]+cfa[indx];
|
|
|
|
if (hcd[indx]<0) {
|
|
if (3.0f*hcd[indx] < -(Ginth+cfa[indx])) {
|
|
hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];
|
|
} else {
|
|
hwt = 1.0f +3.0f*hcd[indx]/(eps+Ginth+cfa[indx]);
|
|
hcd[indx]=hwt*hcd[indx] + (1.0f-hwt)*(ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx]);
|
|
}
|
|
}
|
|
if (vcd[indx]<0) {
|
|
if (3.0f*vcd[indx] < -(Gintv+cfa[indx])) {
|
|
vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
|
|
} else {
|
|
vwt = 1.0f +3.0f*vcd[indx]/(eps+Gintv+cfa[indx]);
|
|
vcd[indx]=vwt*vcd[indx] + (1.0f-vwt)*(ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx]);
|
|
}
|
|
}
|
|
|
|
if (Ginth > clip_pt) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for RT implementation
|
|
if (Gintv > clip_pt) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
|
|
//if (Ginth > pre_mul[c]) hcd[indx]=ULIM(Ginth,cfa[indx-1],cfa[indx+1])-cfa[indx];//for dcraw implementation
|
|
//if (Gintv > pre_mul[c]) vcd[indx]=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1])-cfa[indx];
|
|
cddiffsq[indx] = SQR(vcd[indx]-hcd[indx]);
|
|
}
|
|
c = !c;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef __SSE2__
|
|
__m128 uavev,davev,lavev,ravev,Dgrbvvaruv,Dgrbvvardv,Dgrbhvarlv,Dgrbhvarrv,varwtv,diffwtv,vcdvar1v,hcdvar1v;
|
|
__m128 epssqv = _mm_set1_ps( epssq );
|
|
vmask decmask;
|
|
for (rr=6; rr<rr1-6; rr++) {
|
|
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=8,indx+=8) {
|
|
//compute color difference variances in cardinal directions
|
|
tempv = LC2VFU(vcd[indx]);
|
|
uavev = tempv+LC2VFU(vcd[indx-v1])+LC2VFU(vcd[indx-v2])+LC2VFU(vcd[indx-v3]);
|
|
davev = tempv+LC2VFU(vcd[indx+v1])+LC2VFU(vcd[indx+v2])+LC2VFU(vcd[indx+v3]);
|
|
Dgrbvvaruv = SQRV(tempv-uavev)+SQRV(LC2VFU(vcd[indx-v1])-uavev)+SQRV(LC2VFU(vcd[indx-v2])-uavev)+SQRV(LC2VFU(vcd[indx-v3])-uavev);
|
|
Dgrbvvardv = SQRV(tempv-davev)+SQRV(LC2VFU(vcd[indx+v1])-davev)+SQRV(LC2VFU(vcd[indx+v2])-davev)+SQRV(LC2VFU(vcd[indx+v3])-davev);
|
|
|
|
hwtv = LC2VFU(dirwts1[indx-1])/(LC2VFU(dirwts1[indx-1])+LC2VFU(dirwts1[indx+1]));
|
|
vwtv = LC2VFU(dirwts0[indx-v1])/(LC2VFU(dirwts0[indx+v1])+LC2VFU(dirwts0[indx-v1]));
|
|
|
|
tempv = LC2VFU(hcd[indx]);
|
|
lavev = tempv+LC2VFU(hcd[indx-1])+LC2VFU(hcd[indx-2])+LC2VFU(hcd[indx-3]);
|
|
ravev = tempv+LC2VFU(hcd[indx+1])+LC2VFU(hcd[indx+2])+LC2VFU(hcd[indx+3]);
|
|
Dgrbhvarlv = SQRV(tempv-lavev)+SQRV(LC2VFU(hcd[indx-1])-lavev)+SQRV(LC2VFU(hcd[indx-2])-lavev)+SQRV(LC2VFU(hcd[indx-3])-lavev);
|
|
Dgrbhvarrv = SQRV(tempv-ravev)+SQRV(LC2VFU(hcd[indx+1])-ravev)+SQRV(LC2VFU(hcd[indx+2])-ravev)+SQRV(LC2VFU(hcd[indx+3])-ravev);
|
|
|
|
|
|
vcdvarv = epssqv+vwtv*Dgrbvvardv+(onev-vwtv)*Dgrbvvaruv;
|
|
hcdvarv = epssqv+hwtv*Dgrbhvarrv+(onev-hwtv)*Dgrbhvarlv;
|
|
|
|
//compute fluctuations in up/down and left/right interpolations of colors
|
|
Dgrbvvaruv = (LC2VFU(dgintv[indx]))+(LC2VFU(dgintv[indx-v1]))+(LC2VFU(dgintv[indx-v2]));
|
|
Dgrbvvardv = (LC2VFU(dgintv[indx]))+(LC2VFU(dgintv[indx+v1]))+(LC2VFU(dgintv[indx+v2]));
|
|
Dgrbhvarlv = (LC2VFU(dginth[indx]))+(LC2VFU(dginth[indx-1]))+(LC2VFU(dginth[indx-2]));
|
|
Dgrbhvarrv = (LC2VFU(dginth[indx]))+(LC2VFU(dginth[indx+1]))+(LC2VFU(dginth[indx+2]));
|
|
|
|
vcdvar1v = epssqv+vwtv*Dgrbvvardv+(onev-vwtv)*Dgrbvvaruv;
|
|
hcdvar1v = epssqv+hwtv*Dgrbhvarrv+(onev-hwtv)*Dgrbhvarlv;
|
|
|
|
//determine adaptive weights for G interpolation
|
|
varwtv=hcdvarv/(vcdvarv+hcdvarv);
|
|
diffwtv=hcdvar1v/(vcdvar1v+hcdvar1v);
|
|
|
|
//if both agree on interpolation direction, choose the one with strongest directional discrimination;
|
|
//otherwise, choose the u/d and l/r difference fluctuation weights
|
|
decmask = vandm( vmaskf_gt( (zd5v - varwtv) * (zd5v - diffwtv), ZEROV ), vmaskf_lt( vabsf( zd5v - diffwtv), vabsf( zd5v - varwtv) ) );
|
|
_mm_storeu_ps( &hvwt[indx>>1], vself( decmask, varwtv, diffwtv));
|
|
}
|
|
}
|
|
#else
|
|
for (rr=6; rr<rr1-6; rr++) {
|
|
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=2,indx+=2) {
|
|
|
|
//compute color difference variances in cardinal directions
|
|
|
|
uave = vcd[indx]+vcd[indx-v1]+vcd[indx-v2]+vcd[indx-v3];
|
|
dave = vcd[indx]+vcd[indx+v1]+vcd[indx+v2]+vcd[indx+v3];
|
|
lave = hcd[indx]+hcd[indx-1]+hcd[indx-2]+hcd[indx-3];
|
|
rave = hcd[indx]+hcd[indx+1]+hcd[indx+2]+hcd[indx+3];
|
|
|
|
Dgrbvvaru = SQR(vcd[indx]-uave)+SQR(vcd[indx-v1]-uave)+SQR(vcd[indx-v2]-uave)+SQR(vcd[indx-v3]-uave);
|
|
Dgrbvvard = SQR(vcd[indx]-dave)+SQR(vcd[indx+v1]-dave)+SQR(vcd[indx+v2]-dave)+SQR(vcd[indx+v3]-dave);
|
|
Dgrbhvarl = SQR(hcd[indx]-lave)+SQR(hcd[indx-1]-lave)+SQR(hcd[indx-2]-lave)+SQR(hcd[indx-3]-lave);
|
|
Dgrbhvarr = SQR(hcd[indx]-rave)+SQR(hcd[indx+1]-rave)+SQR(hcd[indx+2]-rave)+SQR(hcd[indx+3]-rave);
|
|
|
|
hwt = dirwts1[indx-1]/(dirwts1[indx-1]+dirwts1[indx+1]);
|
|
vwt = dirwts0[indx-v1]/(dirwts0[indx+v1]+dirwts0[indx-v1]);
|
|
|
|
vcdvar = epssq+vwt*Dgrbvvard+(1.0f-vwt)*Dgrbvvaru;
|
|
hcdvar = epssq+hwt*Dgrbhvarr+(1.0f-hwt)*Dgrbhvarl;
|
|
|
|
//compute fluctuations in up/down and left/right interpolations of colors
|
|
Dgrbvvaru = (dgintv[indx])+(dgintv[indx-v1])+(dgintv[indx-v2]);
|
|
Dgrbvvard = (dgintv[indx])+(dgintv[indx+v1])+(dgintv[indx+v2]);
|
|
Dgrbhvarl = (dginth[indx])+(dginth[indx-1])+(dginth[indx-2]);
|
|
Dgrbhvarr = (dginth[indx])+(dginth[indx+1])+(dginth[indx+2]);
|
|
|
|
vcdvar1 = epssq+vwt*Dgrbvvard+(1.0f-vwt)*Dgrbvvaru;
|
|
hcdvar1 = epssq+hwt*Dgrbhvarr+(1.0f-hwt)*Dgrbhvarl;
|
|
|
|
//determine adaptive weights for G interpolation
|
|
varwt=hcdvar/(vcdvar+hcdvar);
|
|
diffwt=hcdvar1/(vcdvar1+hcdvar1);
|
|
|
|
//if both agree on interpolation direction, choose the one with strongest directional discrimination;
|
|
//otherwise, choose the u/d and l/r difference fluctuation weights
|
|
if ((0.5-varwt)*(0.5-diffwt)>0 && fabsf(0.5-diffwt)<fabsf(0.5-varwt)) {hvwt[indx>>1]=varwt;} else {hvwt[indx>>1]=diffwt;}
|
|
|
|
//hvwt[indx]=varwt;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
// Nyquist test
|
|
for (rr=6; rr<rr1-6; rr++)
|
|
for (cc=6+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-6; cc+=2,indx+=2) {
|
|
|
|
//nyquist texture test: ask if difference of vcd compared to hcd is larger or smaller than RGGB gradients
|
|
nyqtest = (gaussodd[0]*cddiffsq[indx]+
|
|
gaussodd[1]*(cddiffsq[(indx-m1)]+cddiffsq[(indx+p1)]+
|
|
cddiffsq[(indx-p1)]+cddiffsq[(indx+m1)])+
|
|
gaussodd[2]*(cddiffsq[(indx-v2)]+cddiffsq[(indx-2)]+
|
|
cddiffsq[(indx+2)]+cddiffsq[(indx+v2)])+
|
|
gaussodd[3]*(cddiffsq[(indx-m2)]+cddiffsq[(indx+p2)]+
|
|
cddiffsq[(indx-p2)]+cddiffsq[(indx+m2)]));
|
|
|
|
nyqtest -= nyqthresh*(gaussgrad[0]*(delhvsqsum[indx])+
|
|
gaussgrad[1]*(delhvsqsum[indx-v1]+delhvsqsum[indx+1]+
|
|
delhvsqsum[indx-1]+delhvsqsum[indx+v1])+
|
|
gaussgrad[2]*(delhvsqsum[indx-m1]+delhvsqsum[indx+p1]+
|
|
delhvsqsum[indx-p1]+delhvsqsum[indx+m1])+
|
|
gaussgrad[3]*(delhvsqsum[indx-v2]+delhvsqsum[indx-2]+
|
|
delhvsqsum[indx+2]+delhvsqsum[indx+v2])+
|
|
gaussgrad[4]*(delhvsqsum[indx-2*TS-1]+delhvsqsum[indx-2*TS+1]+
|
|
delhvsqsum[indx-TS-2]+delhvsqsum[indx-TS+2]+
|
|
delhvsqsum[indx+TS-2]+delhvsqsum[indx+TS+2]+
|
|
delhvsqsum[indx+2*TS-1]+delhvsqsum[indx+2*TS+1])+
|
|
gaussgrad[5]*(delhvsqsum[indx-m2]+delhvsqsum[indx+p2]+
|
|
delhvsqsum[indx-p2]+delhvsqsum[indx+m2]));
|
|
|
|
|
|
if (nyqtest>0)
|
|
nyquist[indx>>1]=1;//nyquist=1 for nyquist region
|
|
}
|
|
|
|
unsigned int nyquisttemp;
|
|
for (rr=8; rr<rr1-8; rr++){
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
|
|
|
|
nyquisttemp=(nyquist[(indx-v2)>>1]+nyquist[(indx-m1)>>1]+nyquist[(indx+p1)>>1]+
|
|
nyquist[(indx-2)>>1]+nyquist[indx>>1]+nyquist[(indx+2)>>1]+
|
|
nyquist[(indx-p1)>>1]+nyquist[(indx+m1)>>1]+nyquist[(indx+v2)>>1]);
|
|
//if most of your neighbors are named Nyquist, it's likely that you're one too
|
|
if (nyquisttemp>4) nyquist[indx>>1]=1;
|
|
//or not
|
|
if (nyquisttemp<4) nyquist[indx>>1]=0;
|
|
}
|
|
}
|
|
// end of Nyquist test
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
// in areas of Nyquist texture, do area interpolation
|
|
for (rr=8; rr<rr1-8; rr++)
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
|
|
|
|
if (nyquist[indx>>1]) {
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
// area interpolation
|
|
|
|
sumh=sumv=sumsqh=sumsqv=areawt=0;
|
|
for (i=-6; i<7; i+=2)
|
|
for (j=-6; j<7; j+=2) {
|
|
indx1=(rr+i)*TS+cc+j;
|
|
if (nyquist[indx1>>1]) {
|
|
sumh += cfa[indx1]-xdiv2f(cfa[indx1-1]+cfa[indx1+1]);
|
|
sumv += cfa[indx1]-xdiv2f(cfa[indx1-v1]+cfa[indx1+v1]);
|
|
sumsqh += xdiv2f(SQR(cfa[indx1]-cfa[indx1-1])+SQR(cfa[indx1]-cfa[indx1+1]));
|
|
sumsqv += xdiv2f(SQR(cfa[indx1]-cfa[indx1-v1])+SQR(cfa[indx1]-cfa[indx1+v1]));
|
|
areawt +=1;
|
|
}
|
|
}
|
|
|
|
//horizontal and vertical color differences, and adaptive weight
|
|
hcdvar=epssq+fabsf(areawt*sumsqh-sumh*sumh);
|
|
vcdvar=epssq+fabsf(areawt*sumsqv-sumv*sumv);
|
|
hvwt[indx>>1]=hcdvar/(vcdvar+hcdvar);
|
|
|
|
// end of area interpolation
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
}
|
|
}
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
//populate G at R/B sites
|
|
for (rr=8; rr<rr1-8; rr++)
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
|
|
|
|
//first ask if one gets more directional discrimination from nearby B/R sites
|
|
hvwtalt = xdivf(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1],2);
|
|
// hvwtalt = 0.25*(hvwt[(indx-m1)>>1]+hvwt[(indx+p1)>>1]+hvwt[(indx-p1)>>1]+hvwt[(indx+m1)>>1]);
|
|
// vo=fabsf(0.5-hvwt[indx>>1]);
|
|
// ve=fabsf(0.5-hvwtalt);
|
|
if (fabsf(0.5-hvwt[indx>>1])<fabsf(0.5-hvwtalt)) {hvwt[indx>>1]=hvwtalt;}//a better result was obtained from the neighbors
|
|
// if (vo<ve) {hvwt[indx>>1]=hvwtalt;}//a better result was obtained from the neighbors
|
|
|
|
|
|
|
|
Dgrb[0][indx>>1] = (hcd[indx]*(1.0f-hvwt[indx>>1]) + vcd[indx]*hvwt[indx>>1]);//evaluate color differences
|
|
//if (hvwt[indx]<0.5) Dgrb[indx][0]=hcd[indx];
|
|
//if (hvwt[indx]>0.5) Dgrb[indx][0]=vcd[indx];
|
|
rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1];//evaluate G (finally!)
|
|
|
|
//local curvature in G (preparation for nyquist refinement step)
|
|
if (nyquist[indx>>1]) {
|
|
Dgrb2[indx>>1].h = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-1]+rgbgreen[indx+1]));
|
|
Dgrb2[indx>>1].v = SQR(rgbgreen[indx] - xdiv2f(rgbgreen[indx-v1]+rgbgreen[indx+v1]));
|
|
} else {
|
|
Dgrb2[indx>>1].h = Dgrb2[indx>>1].v = 0;
|
|
}
|
|
}
|
|
|
|
//end of standard interpolation
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
// refine Nyquist areas using G curvatures
|
|
|
|
for (rr=8; rr<rr1-8; rr++)
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc; cc<cc1-8; cc+=2,indx+=2) {
|
|
|
|
if (nyquist[indx>>1]) {
|
|
//local averages (over Nyquist pixels only) of G curvature squared
|
|
gvarh = epssq + (gquinc[0]*Dgrb2[indx>>1].h+
|
|
gquinc[1]*(Dgrb2[(indx-m1)>>1].h+Dgrb2[(indx+p1)>>1].h+Dgrb2[(indx-p1)>>1].h+Dgrb2[(indx+m1)>>1].h)+
|
|
gquinc[2]*(Dgrb2[(indx-v2)>>1].h+Dgrb2[(indx-2)>>1].h+Dgrb2[(indx+2)>>1].h+Dgrb2[(indx+v2)>>1].h)+
|
|
gquinc[3]*(Dgrb2[(indx-m2)>>1].h+Dgrb2[(indx+p2)>>1].h+Dgrb2[(indx-p2)>>1].h+Dgrb2[(indx+m2)>>1].h));
|
|
gvarv = epssq + (gquinc[0]*Dgrb2[indx>>1].v+
|
|
gquinc[1]*(Dgrb2[(indx-m1)>>1].v+Dgrb2[(indx+p1)>>1].v+Dgrb2[(indx-p1)>>1].v+Dgrb2[(indx+m1)>>1].v)+
|
|
gquinc[2]*(Dgrb2[(indx-v2)>>1].v+Dgrb2[(indx-2)>>1].v+Dgrb2[(indx+2)>>1].v+Dgrb2[(indx+v2)>>1].v)+
|
|
gquinc[3]*(Dgrb2[(indx-m2)>>1].v+Dgrb2[(indx+p2)>>1].v+Dgrb2[(indx-p2)>>1].v+Dgrb2[(indx+m2)>>1].v));
|
|
//use the results as weights for refined G interpolation
|
|
Dgrb[0][indx>>1] = (hcd[indx]*gvarv + vcd[indx]*gvarh)/(gvarv+gvarh);
|
|
rgbgreen[indx] = cfa[indx] + Dgrb[0][indx>>1];
|
|
}
|
|
}
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
// diagonal interpolation correction
|
|
|
|
#ifdef __SSE2__
|
|
__m128 rbsev,rbnwv,rbnev,rbswv,cfav,rbmv,rbpv,temp1v,wtv;
|
|
__m128 wtsev, wtnwv, wtnev, wtswv, rbvarmv;
|
|
__m128 gausseven0v = _mm_set1_ps(gausseven[0]);
|
|
__m128 gausseven1v = _mm_set1_ps(gausseven[1]);
|
|
__m128 twov = _mm_set1_ps(2.0f);
|
|
#endif
|
|
for (rr=8; rr<rr1-8; rr++) {
|
|
#ifdef __SSE2__
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-8; cc+=8,indx+=8,indx1+=4) {
|
|
|
|
//diagonal color ratios
|
|
cfav = LC2VFU(cfa[indx]);
|
|
|
|
temp1v = LC2VFU(cfa[indx+m1]);
|
|
temp2v = LC2VFU(cfa[indx+m2]);
|
|
rbsev = (temp1v + temp1v) / (epsv + cfav + temp2v );
|
|
rbsev = vself(vmaskf_lt(vabsf(onev - rbsev), arthreshv), cfav * rbsev, temp1v + zd5v * (cfav - temp2v));
|
|
|
|
temp1v = LC2VFU(cfa[indx-m1]);
|
|
temp2v = LC2VFU(cfa[indx-m2]);
|
|
rbnwv = (temp1v + temp1v) / (epsv + cfav + temp2v );
|
|
rbnwv = vself(vmaskf_lt(vabsf(onev - rbnwv), arthreshv), cfav * rbnwv, temp1v + zd5v * (cfav - temp2v));
|
|
|
|
temp1v = epsv + LVFU(delm[indx1]);
|
|
wtsev= temp1v+LVFU(delm[(indx+m1)>>1])+LVFU(delm[(indx+m2)>>1]);//same as for wtu,wtd,wtl,wtr
|
|
wtnwv= temp1v+LVFU(delm[(indx-m1)>>1])+LVFU(delm[(indx-m2)>>1]);
|
|
|
|
rbmv = (wtsev*rbnwv+wtnwv*rbsev)/(wtsev+wtnwv);
|
|
|
|
temp1v = ULIMV(rbmv ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1]));
|
|
wtv = twov * (cfav-rbmv)/(epsv+rbmv+cfav);
|
|
temp2v = wtv * rbmv + (onev-wtv)*temp1v;
|
|
|
|
temp2v = vself(vmaskf_lt(rbmv + rbmv, cfav), temp1v, temp2v);
|
|
temp2v = vself(vmaskf_lt(rbmv, cfav), temp2v, rbmv);
|
|
_mm_storeu_ps(&rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-m1]),LC2VFU(cfa[indx+m1])), temp2v ));
|
|
|
|
|
|
temp1v = LC2VFU(cfa[indx+p1]);
|
|
temp2v = LC2VFU(cfa[indx+p2]);
|
|
rbnev = (temp1v + temp1v) / (epsv + cfav + temp2v );
|
|
rbnev = vself(vmaskf_lt(vabsf(onev - rbnev), arthreshv), cfav * rbnev, temp1v + zd5v * (cfav - temp2v));
|
|
|
|
temp1v = LC2VFU(cfa[indx-p1]);
|
|
temp2v = LC2VFU(cfa[indx-p2]);
|
|
rbswv = (temp1v + temp1v) / (epsv + cfav + temp2v );
|
|
rbswv = vself(vmaskf_lt(vabsf(onev - rbswv), arthreshv), cfav * rbswv, temp1v + zd5v * (cfav - temp2v));
|
|
|
|
temp1v = epsv + LVFU(delp[indx1]);
|
|
wtnev= temp1v+LVFU(delp[(indx+p1)>>1])+LVFU(delp[(indx+p2)>>1]);
|
|
wtswv= temp1v+LVFU(delp[(indx-p1)>>1])+LVFU(delp[(indx-p2)>>1]);
|
|
|
|
rbpv = (wtnev*rbswv+wtswv*rbnev)/(wtnev+wtswv);
|
|
|
|
temp1v = ULIMV(rbpv ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1]));
|
|
wtv = twov * (cfav-rbpv)/(epsv+rbpv+cfav);
|
|
temp2v = wtv * rbpv + (onev-wtv)*temp1v;
|
|
|
|
temp2v = vself(vmaskf_lt(rbpv + rbpv, cfav), temp1v, temp2v);
|
|
temp2v = vself(vmaskf_lt(rbpv, cfav), temp2v, rbpv);
|
|
_mm_storeu_ps(&rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v ,LC2VFU(cfa[indx-p1]),LC2VFU(cfa[indx+p1])), temp2v ));
|
|
|
|
|
|
|
|
rbvarmv = epssqv + (gausseven0v*(LVFU(Dgrbsq1m[(indx-v1)>>1])+LVFU(Dgrbsq1m[(indx-1)>>1])+LVFU(Dgrbsq1m[(indx+1)>>1])+LVFU(Dgrbsq1m[(indx+v1)>>1])) +
|
|
gausseven1v*(LVFU(Dgrbsq1m[(indx-v2-1)>>1])+LVFU(Dgrbsq1m[(indx-v2+1)>>1])+LVFU(Dgrbsq1m[(indx-2-v1)>>1])+LVFU(Dgrbsq1m[(indx+2-v1)>>1])+
|
|
LVFU(Dgrbsq1m[(indx-2+v1)>>1])+LVFU(Dgrbsq1m[(indx+2+v1)>>1])+LVFU(Dgrbsq1m[(indx+v2-1)>>1])+LVFU(Dgrbsq1m[(indx+v2+1)>>1])));
|
|
_mm_storeu_ps(&pmwt[indx1] , rbvarmv/((epssqv + (gausseven0v*(LVFU(Dgrbsq1p[(indx-v1)>>1])+LVFU(Dgrbsq1p[(indx-1)>>1])+LVFU(Dgrbsq1p[(indx+1)>>1])+LVFU(Dgrbsq1p[(indx+v1)>>1])) +
|
|
gausseven1v*(LVFU(Dgrbsq1p[(indx-v2-1)>>1])+LVFU(Dgrbsq1p[(indx-v2+1)>>1])+LVFU(Dgrbsq1p[(indx-2-v1)>>1])+LVFU(Dgrbsq1p[(indx+2-v1)>>1])+
|
|
LVFU(Dgrbsq1p[(indx-2+v1)>>1])+LVFU(Dgrbsq1p[(indx+2+v1)>>1])+LVFU(Dgrbsq1p[(indx+v2-1)>>1])+LVFU(Dgrbsq1p[(indx+v2+1)>>1]))))+rbvarmv));
|
|
|
|
}
|
|
|
|
#else
|
|
for (cc=8+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-8; cc+=2,indx+=2,indx1++) {
|
|
|
|
//diagonal color ratios
|
|
crse=xmul2f(cfa[indx+m1])/(eps+cfa[indx]+(cfa[indx+m2]));
|
|
crnw=xmul2f(cfa[indx-m1])/(eps+cfa[indx]+(cfa[indx-m2]));
|
|
crne=xmul2f(cfa[indx+p1])/(eps+cfa[indx]+(cfa[indx+p2]));
|
|
crsw=xmul2f(cfa[indx-p1])/(eps+cfa[indx]+(cfa[indx-p2]));
|
|
|
|
//assign B/R at R/B sites
|
|
if (fabsf(1.0f-crse)<arthresh)
|
|
rbse=cfa[indx]*crse;//use this if more precise diag interp is necessary
|
|
else
|
|
rbse=(cfa[indx+m1])+xdiv2f(cfa[indx]-cfa[indx+m2]);
|
|
if (fabsf(1.0f-crnw)<arthresh)
|
|
rbnw=cfa[indx]*crnw;
|
|
else
|
|
rbnw=(cfa[indx-m1])+xdiv2f(cfa[indx]-cfa[indx-m2]);
|
|
if (fabsf(1.0f-crne)<arthresh)
|
|
rbne=cfa[indx]*crne;
|
|
else
|
|
rbne=(cfa[indx+p1])+xdiv2f(cfa[indx]-cfa[indx+p2]);
|
|
if (fabsf(1.0f-crsw)<arthresh)
|
|
rbsw=cfa[indx]*crsw;
|
|
else
|
|
rbsw=(cfa[indx-p1])+xdiv2f(cfa[indx]-cfa[indx-p2]);
|
|
|
|
wtse= eps+delm[indx1]+delm[(indx+m1)>>1]+delm[(indx+m2)>>1];//same as for wtu,wtd,wtl,wtr
|
|
wtnw= eps+delm[indx1]+delm[(indx-m1)>>1]+delm[(indx-m2)>>1];
|
|
wtne= eps+delp[indx1]+delp[(indx+p1)>>1]+delp[(indx+p2)>>1];
|
|
wtsw= eps+delp[indx1]+delp[(indx-p1)>>1]+delp[(indx-p2)>>1];
|
|
|
|
|
|
rbm[indx1] = (wtse*rbnw+wtnw*rbse)/(wtse+wtnw);
|
|
rbp[indx1] = (wtne*rbsw+wtsw*rbne)/(wtne+wtsw);
|
|
/*
|
|
rbvarp = epssq + (gausseven[0]*(Dgrbsq1[indx-v1].p+Dgrbsq1[indx-1].p+Dgrbsq1[indx+1].p+Dgrbsq1[indx+v1].p) +
|
|
gausseven[1]*(Dgrbsq1[indx-v2-1].p+Dgrbsq1[indx-v2+1].p+Dgrbsq1[indx-2-v1].p+Dgrbsq1[indx+2-v1].p+
|
|
Dgrbsq1[indx-2+v1].p+Dgrbsq1[indx+2+v1].p+Dgrbsq1[indx+v2-1].p+Dgrbsq1[indx+v2+1].p));
|
|
*/
|
|
rbvarm = epssq + (gausseven[0]*(Dgrbsq1m[(indx-v1)>>1]+Dgrbsq1m[(indx-1)>>1]+Dgrbsq1m[(indx+1)>>1]+Dgrbsq1m[(indx+v1)>>1]) +
|
|
gausseven[1]*(Dgrbsq1m[(indx-v2-1)>>1]+Dgrbsq1m[(indx-v2+1)>>1]+Dgrbsq1m[(indx-2-v1)>>1]+Dgrbsq1m[(indx+2-v1)>>1]+
|
|
Dgrbsq1m[(indx-2+v1)>>1]+Dgrbsq1m[(indx+2+v1)>>1]+Dgrbsq1m[(indx+v2-1)>>1]+Dgrbsq1m[(indx+v2+1)>>1]));
|
|
pmwt[indx1] = rbvarm/((epssq + (gausseven[0]*(Dgrbsq1p[(indx-v1)>>1]+Dgrbsq1p[(indx-1)>>1]+Dgrbsq1p[(indx+1)>>1]+Dgrbsq1p[(indx+v1)>>1]) +
|
|
gausseven[1]*(Dgrbsq1p[(indx-v2-1)>>1]+Dgrbsq1p[(indx-v2+1)>>1]+Dgrbsq1p[(indx-2-v1)>>1]+Dgrbsq1p[(indx+2-v1)>>1]+
|
|
Dgrbsq1p[(indx-2+v1)>>1]+Dgrbsq1p[(indx+2+v1)>>1]+Dgrbsq1p[(indx+v2-1)>>1]+Dgrbsq1p[(indx+v2+1)>>1])))+rbvarm);
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
//bound the interpolation in regions of high saturation
|
|
if (rbp[indx1]<cfa[indx]) {
|
|
if (xmul2f(rbp[indx1]) < cfa[indx]) {
|
|
rbp[indx1] = ULIM(rbp[indx1] ,cfa[indx-p1],cfa[indx+p1]);
|
|
} else {
|
|
pwt = xmul2f(cfa[indx]-rbp[indx1])/(eps+rbp[indx1]+cfa[indx]);
|
|
rbp[indx1]=pwt*rbp[indx1] + (1.0f-pwt)*ULIM(rbp[indx1],cfa[indx-p1],cfa[indx+p1]);
|
|
}
|
|
}
|
|
if (rbm[indx1]<cfa[indx]) {
|
|
if (xmul2f(rbm[indx1]) < cfa[indx]) {
|
|
rbm[indx1] = ULIM(rbm[indx1] ,cfa[indx-m1],cfa[indx+m1]);
|
|
} else {
|
|
mwt = xmul2f(cfa[indx]-rbm[indx1])/(eps+rbm[indx1]+cfa[indx]);
|
|
rbm[indx1]=mwt*rbm[indx1] + (1.0f-mwt)*ULIM(rbm[indx1],cfa[indx-m1],cfa[indx+m1]);
|
|
}
|
|
}
|
|
|
|
if (rbp[indx1] > clip_pt) rbp[indx1]=ULIM(rbp[indx1],cfa[indx-p1],cfa[indx+p1]);//for RT implementation
|
|
if (rbm[indx1] > clip_pt) rbm[indx1]=ULIM(rbm[indx1],cfa[indx-m1],cfa[indx+m1]);
|
|
//c=2-FC(rr,cc);//for dcraw implementation
|
|
//if (rbp[indx] > pre_mul[c]) rbp[indx]=ULIM(rbp[indx],cfa[indx-p1],cfa[indx+p1]);
|
|
//if (rbm[indx] > pre_mul[c]) rbm[indx]=ULIM(rbm[indx],cfa[indx-m1],cfa[indx+m1]);
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
//rbint[indx] = 0.5*(cfa[indx] + (rbp*rbvarm+rbm*rbvarp)/(rbvarp+rbvarm));//this is R+B, interpolated
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#ifdef __SSE2__
|
|
__m128 pmwtaltv;
|
|
__m128 zd25v = _mm_set1_ps(0.25f);
|
|
#endif
|
|
for (rr=10; rr<rr1-10; rr++)
|
|
#ifdef __SSE2__
|
|
for (cc=10+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-10; cc+=8,indx+=8,indx1+=4) {
|
|
|
|
//first ask if one gets more directional discrimination from nearby B/R sites
|
|
pmwtaltv = zd25v*(LVFU(pmwt[(indx-m1)>>1])+LVFU(pmwt[(indx+p1)>>1])+LVFU(pmwt[(indx-p1)>>1])+LVFU(pmwt[(indx+m1)>>1]));
|
|
tempv = LVFU(pmwt[indx1]);
|
|
tempv = vself(vmaskf_lt(vabsf(zd5v-tempv), vabsf(zd5v-pmwtaltv)), pmwtaltv, tempv);
|
|
_mm_storeu_ps( &pmwt[indx1], tempv);
|
|
_mm_storeu_ps( &rbint[indx1], zd5v * (LC2VFU(cfa[indx]) + LVFU(rbm[indx1]) * (onev - tempv) + LVFU(rbp[indx1]) * tempv));
|
|
}
|
|
|
|
#else
|
|
for (cc=10+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-10; cc+=2,indx+=2,indx1++) {
|
|
|
|
//first ask if one gets more directional discrimination from nearby B/R sites
|
|
pmwtalt = xdivf(pmwt[(indx-m1)>>1]+pmwt[(indx+p1)>>1]+pmwt[(indx-p1)>>1]+pmwt[(indx+m1)>>1],2);
|
|
if (fabsf(0.5-pmwt[indx1])<fabsf(0.5-pmwtalt)) {pmwt[indx1]=pmwtalt;}//a better result was obtained from the neighbors
|
|
|
|
rbint[indx1] = xdiv2f(cfa[indx] + rbm[indx1]*(1.0f-pmwt[indx1]) + rbp[indx1]*pmwt[indx1]);//this is R+B, interpolated
|
|
}
|
|
#endif
|
|
|
|
for (rr=12; rr<rr1-12; rr++)
|
|
for (cc=12+(FC(rr,2)&1),indx=rr*TS+cc,indx1=indx>>1; cc<cc1-12; cc+=2,indx+=2,indx1++) {
|
|
|
|
if (fabsf(0.5-pmwt[indx>>1])<fabsf(0.5-hvwt[indx>>1]) )
|
|
continue;
|
|
|
|
//now interpolate G vertically/horizontally using R+B values
|
|
//unfortunately, since G interpolation cannot be done diagonally this may lead to color shifts
|
|
//color ratios for G interpolation
|
|
|
|
cru = cfa[indx-v1]*2.0/(eps+rbint[indx1]+rbint[(indx1-v1)]);
|
|
crd = cfa[indx+v1]*2.0/(eps+rbint[indx1]+rbint[(indx1+v1)]);
|
|
crl = cfa[indx-1]*2.0/(eps+rbint[indx1]+rbint[(indx1-1)]);
|
|
crr = cfa[indx+1]*2.0/(eps+rbint[indx1]+rbint[(indx1+1)]);
|
|
|
|
//interpolated G via adaptive ratios or Hamilton-Adams in each cardinal direction
|
|
if (fabsf(1.0f-cru)<arthresh) {gu=rbint[indx1]*cru;}
|
|
else {gu=cfa[indx-v1]+xdiv2f(rbint[indx1]-rbint[(indx1-v1)]);}
|
|
if (fabsf(1.0f-crd)<arthresh) {gd=rbint[indx1]*crd;}
|
|
else {gd=cfa[indx+v1]+xdiv2f(rbint[indx1]-rbint[(indx1+v1)]);}
|
|
if (fabsf(1.0f-crl)<arthresh) {gl=rbint[indx1]*crl;}
|
|
else {gl=cfa[indx-1]+xdiv2f(rbint[indx1]-rbint[(indx1-1)]);}
|
|
if (fabsf(1.0f-crr)<arthresh) {gr=rbint[indx1]*crr;}
|
|
else {gr=cfa[indx+1]+xdiv2f(rbint[indx1]-rbint[(indx1+1)]);}
|
|
|
|
//gu=rbint[indx]*cru;
|
|
//gd=rbint[indx]*crd;
|
|
//gl=rbint[indx]*crl;
|
|
//gr=rbint[indx]*crr;
|
|
|
|
//interpolated G via adaptive weights of cardinal evaluations
|
|
Gintv = (dirwts0[indx-v1]*gd+dirwts0[indx+v1]*gu)/(dirwts0[indx+v1]+dirwts0[indx-v1]);
|
|
Ginth = (dirwts1[indx-1]*gr+dirwts1[indx+1]*gl)/(dirwts1[indx-1]+dirwts1[indx+1]);
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
//bound the interpolation in regions of high saturation
|
|
if (Gintv<rbint[indx1]) {
|
|
if (2*Gintv < rbint[indx1]) {
|
|
Gintv = ULIM(Gintv ,cfa[indx-v1],cfa[indx+v1]);
|
|
} else {
|
|
vwt = 2.0*(rbint[indx1]-Gintv)/(eps+Gintv+rbint[indx1]);
|
|
Gintv=vwt*Gintv + (1.0f-vwt)*ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
|
|
}
|
|
}
|
|
if (Ginth<rbint[indx1]) {
|
|
if (2*Ginth < rbint[indx1]) {
|
|
Ginth = ULIM(Ginth ,cfa[indx-1],cfa[indx+1]);
|
|
} else {
|
|
hwt = 2.0*(rbint[indx1]-Ginth)/(eps+Ginth+rbint[indx1]);
|
|
Ginth=hwt*Ginth + (1.0f-hwt)*ULIM(Ginth,cfa[indx-1],cfa[indx+1]);
|
|
}
|
|
}
|
|
|
|
if (Ginth > clip_pt) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]);//for RT implementation
|
|
if (Gintv > clip_pt) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
|
|
//c=FC(rr,cc);//for dcraw implementation
|
|
//if (Ginth > pre_mul[c]) Ginth=ULIM(Ginth,cfa[indx-1],cfa[indx+1]);
|
|
//if (Gintv > pre_mul[c]) Gintv=ULIM(Gintv,cfa[indx-v1],cfa[indx+v1]);
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
rgbgreen[indx] = Ginth*(1.0f-hvwt[indx1]) + Gintv*hvwt[indx1];
|
|
//rgb[indx][1] = 0.5*(rgb[indx][1]+0.25*(rgb[indx-v1][1]+rgb[indx+v1][1]+rgb[indx-1][1]+rgb[indx+1][1]));
|
|
Dgrb[0][indx>>1] = rgbgreen[indx]-cfa[indx];
|
|
|
|
//rgb[indx][2-FC(rr,cc)]=2*rbint[indx]-cfa[indx];
|
|
}
|
|
//end of diagonal interpolation correction
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
//fancy chrominance interpolation
|
|
//(ey,ex) is location of R site
|
|
for (rr=13-ey; rr<rr1-12; rr+=2)
|
|
for (cc=13-ex,indx1=(rr*TS+cc)>>1; cc<cc1-12; cc+=2,indx1++) {//B coset
|
|
Dgrb[1][indx1]=Dgrb[0][indx1];//split out G-B from G-R
|
|
Dgrb[0][indx1]=0;
|
|
}
|
|
#ifdef __SSE2__
|
|
// __m128 wtnwv,wtnev,wtswv,wtsev;
|
|
__m128 oned325v = _mm_set1_ps( 1.325f );
|
|
__m128 zd175v = _mm_set1_ps( 0.175f );
|
|
__m128 zd075v = _mm_set1_ps( 0.075f );
|
|
#endif
|
|
for (rr=14; rr<rr1-14; rr++)
|
|
#ifdef __SSE2__
|
|
for (cc=14+(FC(rr,2)&1),indx=rr*TS+cc,c=1-FC(rr,cc)/2; cc<cc1-14; cc+=8,indx+=8) {
|
|
wtnwv=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx+m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m3)>>1])));
|
|
wtnev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p3)>>1])));
|
|
wtswv=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+p1)>>1]))+vabsf(LVFU(Dgrb[c][(indx-p1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1]))+vabsf(LVFU(Dgrb[c][(indx+p1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1])));
|
|
wtsev=onev/(epsv+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-m1)>>1]))+vabsf(LVFU(Dgrb[c][(indx+m1)>>1])-LVFU(Dgrb[c][(indx-p3)>>1]))+vabsf(LVFU(Dgrb[c][(indx-m1)>>1])-LVFU(Dgrb[c][(indx+m3)>>1])));
|
|
|
|
//Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse);
|
|
|
|
_mm_storeu_ps(&Dgrb[c][indx>>1], (wtnwv*(oned325v*LVFU(Dgrb[c][(indx-m1)>>1])-zd175v*LVFU(Dgrb[c][(indx-m3)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-m1-v2)>>1]) )+
|
|
wtnev*(oned325v*LVFU(Dgrb[c][(indx+p1)>>1])-zd175v*LVFU(Dgrb[c][(indx+p3)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+p1+v2)>>1]) )+
|
|
wtswv*(oned325v*LVFU(Dgrb[c][(indx-p1)>>1])-zd175v*LVFU(Dgrb[c][(indx-p3)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-2)>>1])-zd075v*LVFU(Dgrb[c][(indx-p1-v2)>>1]) )+
|
|
wtsev*(oned325v*LVFU(Dgrb[c][(indx+m1)>>1])-zd175v*LVFU(Dgrb[c][(indx+m3)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+2)>>1])-zd075v*LVFU(Dgrb[c][(indx+m1+v2)>>1]) ))/(wtnwv+wtnev+wtswv+wtsev));
|
|
}
|
|
|
|
#else
|
|
for (cc=14+(FC(rr,2)&1),indx=rr*TS+cc,c=1-FC(rr,cc)/2; cc<cc1-14; cc+=2,indx+=2) {
|
|
wtnw=1.0f/(eps+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx+m1)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx-m3)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m3)>>1]));
|
|
wtne=1.0f/(eps+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p1)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx+p3)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p3)>>1]));
|
|
wtsw=1.0f/(eps+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+p1)>>1])+fabsf(Dgrb[c][(indx-p1)>>1]-Dgrb[c][(indx+m3)>>1])+fabsf(Dgrb[c][(indx+p1)>>1]-Dgrb[c][(indx-p3)>>1]));
|
|
wtse=1.0f/(eps+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-m1)>>1])+fabsf(Dgrb[c][(indx+m1)>>1]-Dgrb[c][(indx-p3)>>1])+fabsf(Dgrb[c][(indx-m1)>>1]-Dgrb[c][(indx+m3)>>1]));
|
|
|
|
//Dgrb[indx][c]=(wtnw*Dgrb[indx-m1][c]+wtne*Dgrb[indx+p1][c]+wtsw*Dgrb[indx-p1][c]+wtse*Dgrb[indx+m1][c])/(wtnw+wtne+wtsw+wtse);
|
|
|
|
Dgrb[c][indx>>1]=(wtnw*(1.325f*Dgrb[c][(indx-m1)>>1]-0.175f*Dgrb[c][(indx-m3)>>1]-0.075f*Dgrb[c][(indx-m1-2)>>1]-0.075f*Dgrb[c][(indx-m1-v2)>>1] )+
|
|
wtne*(1.325f*Dgrb[c][(indx+p1)>>1]-0.175f*Dgrb[c][(indx+p3)>>1]-0.075f*Dgrb[c][(indx+p1+2)>>1]-0.075f*Dgrb[c][(indx+p1+v2)>>1] )+
|
|
wtsw*(1.325f*Dgrb[c][(indx-p1)>>1]-0.175f*Dgrb[c][(indx-p3)>>1]-0.075f*Dgrb[c][(indx-p1-2)>>1]-0.075f*Dgrb[c][(indx-p1-v2)>>1] )+
|
|
wtse*(1.325f*Dgrb[c][(indx+m1)>>1]-0.175f*Dgrb[c][(indx+m3)>>1]-0.075f*Dgrb[c][(indx+m1+2)>>1]-0.075f*Dgrb[c][(indx+m1+v2)>>1] ))/(wtnw+wtne+wtsw+wtse);
|
|
}
|
|
#endif
|
|
float temp;
|
|
for (rr=16; rr<rr1-16; rr++) {
|
|
if((FC(rr,2)&1)==1) {
|
|
for (cc=16,indx=rr*TS+cc,row=rr+top; cc<cc1-16-(cc1&1); cc+=2,indx++) {
|
|
col = cc + left;
|
|
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
|
|
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
|
|
temp);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
|
|
temp);
|
|
|
|
indx++;
|
|
col++;
|
|
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
|
|
}
|
|
if(cc1&1) { // width of tile is odd
|
|
col = cc + left;
|
|
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
|
|
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
|
|
temp);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
|
|
temp);
|
|
}
|
|
}
|
|
else {
|
|
for (cc=16,indx=rr*TS+cc,row=rr+top; cc<cc1-16-(cc1&1); cc+=2,indx++) {
|
|
col = cc + left;
|
|
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
|
|
|
|
indx++;
|
|
col++;
|
|
temp = 1.0f/((hvwt[(indx-v1)>>1])+(1.0f-hvwt[(indx+1)>>1])+(1.0f-hvwt[(indx-1)>>1])+(hvwt[(indx+v1)>>1]));
|
|
red[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[0][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[0][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[0][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[0][(indx+v1)>>1])*
|
|
temp);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]- ((hvwt[(indx-v1)>>1])*Dgrb[1][(indx-v1)>>1]+(1.0f-hvwt[(indx+1)>>1])*Dgrb[1][(indx+1)>>1]+(1.0f-hvwt[(indx-1)>>1])*Dgrb[1][(indx-1)>>1]+(hvwt[(indx+v1)>>1])*Dgrb[1][(indx+v1)>>1])*
|
|
temp);
|
|
}
|
|
if(cc1&1) { // width of tile is odd
|
|
col = cc + left;
|
|
red[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[0][indx>>1]);
|
|
blue[row][col]=65535.0f*(rgbgreen[indx]-Dgrb[1][indx>>1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
// copy smoothed results back to image matrix
|
|
for (rr=16; rr < rr1-16; rr++){
|
|
#ifdef __SSE2__
|
|
for (row=rr+top, cc=16; cc < cc1-19; cc+=4) {
|
|
_mm_storeu_ps(&green[row][cc + left], LVF(rgbgreen[rr*TS+cc]) * c65535v);
|
|
}
|
|
#else
|
|
for (row=rr+top, cc=16; cc < cc1-16; cc++) {
|
|
col = cc + left;
|
|
indx=rr*TS+cc;
|
|
green[row][col] = ((65535.0f*rgbgreen[indx]));
|
|
|
|
//for dcraw implementation
|
|
//for (c=0; c<3; c++){
|
|
// image[indx][c] = CLIP((int)(65535.0f*rgb[rr*TS+cc][c] + 0.5f));
|
|
//}
|
|
}
|
|
#endif
|
|
}
|
|
//end of main loop
|
|
|
|
if(plistener) {
|
|
progresscounter++;
|
|
if(progresscounter % 4 == 0) {
|
|
#pragma omp critical
|
|
{
|
|
progress+=(double)4*((TS-32)*(TS-32))/(height*width);
|
|
if (progress>1.0)
|
|
{
|
|
progress=1.0;
|
|
}
|
|
plistener->setProgress(progress);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
|
|
// clean up
|
|
free(buffer);
|
|
}
|
|
if(plistener)
|
|
plistener->setProgress(1.0);
|
|
|
|
|
|
// done
|
|
|
|
#undef TS
|
|
|
|
}
|
|
} |