diff --git a/rtengine/green_equil_RT.cc b/rtengine/green_equil_RT.cc index 8b1136359..91093550d 100644 --- a/rtengine/green_equil_RT.cc +++ b/rtengine/green_equil_RT.cc @@ -3,9 +3,10 @@ // Green Equilibration via directional average // // copyright (c) 2008-2010 Emil Martinec +// optimized for speed 2017 Ingo Weyrich // // -// code dated: February 12, 2011 +// code dated: August 25, 2017 // // green_equil_RT.cc is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -21,18 +22,62 @@ // along with this program. If not, see . // //////////////////////////////////////////////////////////////// -#define TS 256 // Tile size #include #include #include - #include "rt_math.h" #include "rawimagesource.h" +#include "opthelper.h" + namespace rtengine { +void RawImageSource::green_equilibrate_global(array2D &rawData) +{ + // global correction + int ng1 = 0, ng2 = 0; + double avgg1 = 0., avgg2 = 0.; + +#ifdef _OPENMP + #pragma omp parallel for reduction(+: ng1, ng2, avgg1, avgg2) schedule(dynamic,16) +#endif + + for (int i = border; i < H - border; i++) { + double avgg = 0.; + + for (int j = border + ((FC(i, border) & 1) ^ 1); j < W - border; j += 2) { + avgg += rawData[i][j]; + } + + int ng = (W - 2 * border + (FC(i, border) & 1)) / 2; + + if (i & 1) { + avgg2 += avgg; + ng2 += ng; + } else { + avgg1 += avgg; + ng1 += ng; + } + } + + double corrg1 = (avgg1 / ng1 + avgg2 / ng2) / 2.0 / (avgg1 / ng1); + double corrg2 = (avgg1 / ng1 + avgg2 / ng2) / 2.0 / (avgg2 / ng2); + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif + + for (int i = border; i < H - border; i++) { + double corrg = (i & 1) ? corrg2 : corrg1; + + for (int j = border + ((FC(i, border) & 1) ^ 1); j < W - border; j += 2) { + rawData[i][j] *= corrg; + } + } +} + //void green_equilibrate()//for dcraw implementation void RawImageSource::green_equilibrate(float thresh, array2D &rawData) { @@ -42,15 +87,29 @@ void RawImageSource::green_equilibrate(float thresh, array2D &rawData) int height = H, width = W; // local variables - float** rawptr = rawData; - array2D cfa (width, height, rawptr); - //array2D checker (width,height,ARRAY2D_CLEAR_DATA); + array2D cfa(width / 2 + (width & 1), height); +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif - //int verbose=1; + for (int i = 0; i < height; ++i) { + int j = (FC(i, 0) & 1) ^ 1; +#ifdef __SSE2__ - static const float eps = 1.0; //tolerance to avoid dividing by zero + for (; j < width - 7; j += 8) { + STVFU(cfa[i][j >> 1], LC2VFU(rawData[i][j])); + } +#endif + + for (; j < width; j += 2) { + cfa[i][j >> 1] = rawData[i][j]; + } + } + + constexpr float eps = 1.f; //tolerance to avoid dividing by zero + const float thresh6 = 6 * thresh; // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Fill G interpolated values with border interpolation and input values @@ -59,94 +118,118 @@ void RawImageSource::green_equilibrate(float thresh, array2D &rawData) //int counter, vtest; //The green equilibration algorithm starts here - /* - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int rr=1; rr < height-1; rr++) - for (int cc=3-(FC(rr,2)&1); cc < width-2; cc+=2) { - - float pcorr = (cfa[rr+1][cc+1]-cfa[rr][cc])*(cfa[rr-1][cc-1]-cfa[rr][cc]); - float mcorr = (cfa[rr-1][cc+1]-cfa[rr][cc])*(cfa[rr+1][cc-1]-cfa[rr][cc]); - - if (pcorr>0 && mcorr>0) {checker[rr][cc]=1;} else {checker[rr][cc]=0;} - - checker[rr][cc]=1;//test what happens if we always interpolate - } - - counter=vtest=0; - */ //now smooth the cfa data #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) + #pragma omp parallel +#endif + { +#ifdef __SSE2__ + vfloat zd5v = F2V(0.5f); + vfloat onev = F2V(1.f); + vfloat threshv = F2V(thresh); + vfloat thresh6v = F2V(thresh6); + vfloat epsv = F2V(eps); +#endif +#ifdef _OPENMP + #pragma omp for schedule(dynamic,16) #endif - for (int rr = 4; rr < height - 4; rr++) - for (int cc = 5 - (FC(rr, 2) & 1); cc < width - 6; cc += 2) { - //if (checker[rr][cc]) { - //%%%%%%%%%%%%%%%%%%%%%% - //neighbor checking code from Manuel Llorens Garcia - float o1_1 = cfa[(rr - 1)][cc - 1]; - float o1_2 = cfa[(rr - 1)][cc + 1]; - float o1_3 = cfa[(rr + 1)][cc - 1]; - float o1_4 = cfa[(rr + 1)][cc + 1]; - float o2_1 = cfa[(rr - 2)][cc]; - float o2_2 = cfa[(rr + 2)][cc]; - float o2_3 = cfa[(rr)][cc - 2]; - float o2_4 = cfa[(rr)][cc + 2]; + for (int rr = 4; rr < height - 4; rr++) { + int cc = 5 - (FC(rr, 2) & 1); +#ifdef __SSE2__ - float d1 = (o1_1 + o1_2 + o1_3 + o1_4) * 0.25f; - float d2 = (o2_1 + o2_2 + o2_3 + o2_4) * 0.25f; + for (; cc < width - 12; cc += 8) { + //neighbour checking code from Manuel Llorens Garcia + vfloat o1_1 = LVFU(cfa[rr - 1][(cc - 1) >> 1]); + vfloat o1_2 = LVFU(cfa[rr - 1][(cc + 1) >> 1]); + vfloat o1_3 = LVFU(cfa[rr + 1][(cc - 1) >> 1]); + vfloat o1_4 = LVFU(cfa[rr + 1][(cc + 1) >> 1]); + vfloat o2_1 = LVFU(cfa[rr - 2][cc >> 1]); + vfloat o2_2 = LVFU(cfa[rr + 2][cc >> 1]); + vfloat o2_3 = LVFU(cfa[rr][(cc >> 1) - 1]); + vfloat o2_4 = LVFU(cfa[rr][(cc >> 1) + 1]); - float c1 = (fabs(o1_1 - o1_2) + fabs(o1_1 - o1_3) + fabs(o1_1 - o1_4) + fabs(o1_2 - o1_3) + fabs(o1_3 - o1_4) + fabs(o1_2 - o1_4)) / 6.f; - float c2 = (fabs(o2_1 - o2_2) + fabs(o2_1 - o2_3) + fabs(o2_1 - o2_4) + fabs(o2_2 - o2_3) + fabs(o2_3 - o2_4) + fabs(o2_2 - o2_4)) / 6.f; - //%%%%%%%%%%%%%%%%%%%%%% + vfloat d1 = (o1_1 + o1_2 + o1_3 + o1_4); + vfloat d2 = (o2_1 + o2_2 + o2_3 + o2_4); - //vote1=(checker[rr-2][cc]+checker[rr][cc-2]+checker[rr][cc+2]+checker[rr+2][cc]); - //vote2=(checker[rr+1][cc-1]+checker[rr+1][cc+1]+checker[rr-1][cc-1]+checker[rr-1][cc+1]); - //if ((vote1==0 || vote2==0) && (c1+c2)<2*thresh*fabs(d1-d2)) vtest++; - //if (vote1>0 && vote2>0 && (c1+c2)<4*thresh*fabs(d1-d2)) { - if ((c1 + c2) < 4 * thresh * fabs(d1 - d2)) { - //pixel interpolation - float gin = cfa[rr][cc]; + vfloat c1 = (vabsf(o1_1 - o1_2) + vabsf(o1_1 - o1_3) + vabsf(o1_1 - o1_4) + vabsf(o1_2 - o1_3) + vabsf(o1_3 - o1_4) + vabsf(o1_2 - o1_4)); + vfloat c2 = (vabsf(o2_1 - o2_2) + vabsf(o2_1 - o2_3) + vabsf(o2_1 - o2_4) + vabsf(o2_2 - o2_3) + vabsf(o2_3 - o2_4) + vabsf(o2_2 - o2_4)); - float gse = (cfa[rr + 1][cc + 1]) + 0.5f * (cfa[rr][cc] - cfa[rr + 2][cc + 2]); - float gnw = (cfa[rr - 1][cc - 1]) + 0.5f * (cfa[rr][cc] - cfa[rr - 2][cc - 2]); - float gne = (cfa[rr - 1][cc + 1]) + 0.5f * (cfa[rr][cc] - cfa[rr - 2][cc + 2]); - float gsw = (cfa[rr + 1][cc - 1]) + 0.5f * (cfa[rr][cc] - cfa[rr + 2][cc - 2]); + vmask mask1 = vmaskf_lt(c1 + c2, thresh6v * vabsf(d1 - d2)); + if (_mm_movemask_ps((vfloat)mask1)) { // if for any of the 4 pixels the condition is true, do the maths for all 4 pixels and mask the unused out at the end + //pixel interpolation + vfloat gin = LVFU(cfa[rr][cc >> 1]); + vfloat gmp2p2 = gin - LVFU(cfa[rr + 2][(cc >> 1) + 1]); + vfloat gmm2m2 = gin - LVFU(cfa[rr - 2][(cc >> 1) - 1]); + vfloat gmm2p2 = gin - LVFU(cfa[rr - 2][(cc >> 1) + 1]); + vfloat gmp2m2 = gin - LVFU(cfa[rr + 2][(cc >> 1) - 1]); - float wtse = 1.0f / (eps + SQR(cfa[rr + 2][cc + 2] - cfa[rr][cc]) + SQR(cfa[rr + 3][cc + 3] - cfa[rr + 1][cc + 1])); - float wtnw = 1.0f / (eps + SQR(cfa[rr - 2][cc - 2] - cfa[rr][cc]) + SQR(cfa[rr - 3][cc - 3] - cfa[rr - 1][cc - 1])); - float wtne = 1.0f / (eps + SQR(cfa[rr - 2][cc + 2] - cfa[rr][cc]) + SQR(cfa[rr - 3][cc + 3] - cfa[rr - 1][cc + 1])); - float wtsw = 1.0f / (eps + SQR(cfa[rr + 2][cc - 2] - cfa[rr][cc]) + SQR(cfa[rr + 3][cc - 3] - cfa[rr + 1][cc - 1])); + vfloat gse = o1_4 + zd5v * gmp2p2; + vfloat gnw = o1_1 + zd5v * gmm2m2; + vfloat gne = o1_2 + zd5v * gmm2p2; + vfloat gsw = o1_3 + zd5v * gmp2m2; - float ginterp = (gse * wtse + gnw * wtnw + gne * wtne + gsw * wtsw) / (wtse + wtnw + wtne + wtsw); + vfloat wtse = onev / (epsv + SQRV(gmp2p2) + SQRV(LVFU(cfa[rr + 3][(cc + 3) >> 1]) - o1_4)); + vfloat wtnw = onev / (epsv + SQRV(gmm2m2) + SQRV(LVFU(cfa[rr - 3][(cc - 3) >> 1]) - o1_1)); + vfloat wtne = onev / (epsv + SQRV(gmm2p2) + SQRV(LVFU(cfa[rr - 3][(cc + 3) >> 1]) - o1_2)); + vfloat wtsw = onev / (epsv + SQRV(gmp2m2) + SQRV(LVFU(cfa[rr + 3][(cc - 3) >> 1]) - o1_3)); - if ( ((ginterp - gin) < thresh * (ginterp + gin)) ) { - rawData[rr][cc] = 0.5f * (ginterp + gin); - //counter++; + vfloat ginterp = (gse * wtse + gnw * wtnw + gne * wtne + gsw * wtsw) / (wtse + wtnw + wtne + wtsw); + + vfloat val = vself(vmaskf_lt(ginterp - gin, threshv * (ginterp + gin)), zd5v * (ginterp + gin), gin); + val = vself(mask1, val, gin); + STC2VFU(rawData[rr][cc], val); } - } - // } +#endif + + for (; cc < width - 6; cc += 2) { + //neighbour checking code from Manuel Llorens Garcia + float o1_1 = cfa[rr - 1][(cc - 1) >> 1]; + float o1_2 = cfa[rr - 1][(cc + 1) >> 1]; + float o1_3 = cfa[rr + 1][(cc - 1) >> 1]; + float o1_4 = cfa[rr + 1][(cc + 1) >> 1]; + float o2_1 = cfa[rr - 2][cc >> 1]; + float o2_2 = cfa[rr + 2][cc >> 1]; + float o2_3 = cfa[rr][(cc - 2) >> 1]; + float o2_4 = cfa[rr][(cc + 2) >> 1]; + + float d1 = (o1_1 + o1_2) + (o1_3 + o1_4); + float d2 = (o2_1 + o2_2) + (o2_3 + o2_4); + + float c1 = (fabs(o1_1 - o1_2) + fabs(o1_1 - o1_3) + fabs(o1_1 - o1_4) + fabs(o1_2 - o1_3) + fabs(o1_3 - o1_4) + fabs(o1_2 - o1_4)); + float c2 = (fabs(o2_1 - o2_2) + fabs(o2_1 - o2_3) + fabs(o2_1 - o2_4) + fabs(o2_2 - o2_3) + fabs(o2_3 - o2_4) + fabs(o2_2 - o2_4)); + + if (c1 + c2 < thresh6 * fabs(d1 - d2)) { + //pixel interpolation + float gin = cfa[rr][cc >> 1]; + + float gmp2p2 = gin - cfa[rr + 2][(cc + 2) >> 1]; + float gmm2m2 = gin - cfa[rr - 2][(cc - 2) >> 1]; + float gmm2p2 = gin - cfa[rr - 2][(cc + 2) >> 1]; + float gmp2m2 = gin - cfa[rr + 2][(cc - 2) >> 1]; + + float gse = o1_4 + 0.5f * gmp2p2; + float gnw = o1_1 + 0.5f * gmm2m2; + float gne = o1_2 + 0.5f * gmm2p2; + float gsw = o1_3 + 0.5f * gmp2m2; + + float wtse = 1.f / (eps + SQR(gmp2p2) + SQR(cfa[rr + 3][(cc + 3) >> 1] - o1_4)); + float wtnw = 1.f / (eps + SQR(gmm2m2) + SQR(cfa[rr - 3][(cc - 3) >> 1] - o1_1)); + float wtne = 1.f / (eps + SQR(gmm2p2) + SQR(cfa[rr - 3][(cc + 3) >> 1] - o1_2)); + float wtsw = 1.f / (eps + SQR(gmp2m2) + SQR(cfa[rr + 3][(cc - 3) >> 1] - o1_3)); + + float ginterp = (gse * wtse + gnw * wtnw + gne * wtne + gsw * wtsw) / (wtse + wtnw + wtne + wtsw); + + if (ginterp - gin < thresh * (ginterp + gin)) { + rawData[rr][cc] = 0.5f * (ginterp + gin); + } + } + } } - - //printf("pixfix count= %d; vtest= %d \n",counter,vtest); - // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - // done - /*t2 = clock(); - dt = ((double)(t2-t1)) / CLOCKS_PER_SEC; - if (verbose) { - fprintf(stderr,_("elapsed time = %5.3fs\n"),dt); - }*/ - - + } } } - -#undef TS diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 148d17ee9..77710229a 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -1913,42 +1913,16 @@ void RawImageSource::preprocess (const RAWParams &raw, const LensProfParams &le } } - // check if it is an olympus E camera, if yes, compute G channel pre-compensation factors + // check if it is an olympus E camera or green equilibration is enabled. If yes, compute G channel pre-compensation factors if ( ri->getSensorType() == ST_BAYER && (raw.bayersensor.greenthresh || (((idata->getMake().size() >= 7 && idata->getMake().substr(0, 7) == "OLYMPUS" && idata->getModel()[0] == 'E') || (idata->getMake().size() >= 9 && idata->getMake().substr(0, 9) == "Panasonic")) && raw.bayersensor.method != RAWParams::BayerSensor::methodstring[ RAWParams::BayerSensor::vng4])) ) { // global correction - int ng1 = 0, ng2 = 0, i = 0; - double avgg1 = 0., avgg2 = 0.; - -#ifdef _OPENMP - #pragma omp parallel for default(shared) private(i) reduction(+: ng1, ng2, avgg1, avgg2) -#endif - - for (i = border; i < H - border; i++) - for (int j = border; j < W - border; j++) - if (ri->ISGREEN(i, j)) { - if (i & 1) { - avgg2 += rawData[i][j]; - ng2++; - } else { - avgg1 += rawData[i][j]; - ng1++; - } - } - - double corrg1 = ((double)avgg1 / ng1 + (double)avgg2 / ng2) / 2.0 / ((double)avgg1 / ng1); - double corrg2 = ((double)avgg1 / ng1 + (double)avgg2 / ng2) / 2.0 / ((double)avgg2 / ng2); - -#ifdef _OPENMP - #pragma omp parallel for default(shared) -#endif - - for (int i = border; i < H - border; i++) - for (int j = border; j < W - border; j++) - if (ri->ISGREEN(i, j)) { - float currData; - currData = (float)(rawData[i][j] * ((i & 1) ? corrg2 : corrg1)); - rawData[i][j] = (currData); - } + if(numFrames == 4) { + for(int i = 0; i < 4; ++i) { + green_equilibrate_global(*rawDataFrames[i]); + } + } else { + green_equilibrate_global(rawData); + } } if ( ri->getSensorType() == ST_BAYER && raw.bayersensor.greenthresh > 0) { @@ -1959,10 +1933,10 @@ void RawImageSource::preprocess (const RAWParams &raw, const LensProfParams &le if(numFrames == 4) { for(int i = 0; i < 4; ++i) { - green_equilibrate(0.01 * (raw.bayersensor.greenthresh), *rawDataFrames[i]); + green_equilibrate(0.01 * raw.bayersensor.greenthresh, *rawDataFrames[i]); } } else { - green_equilibrate(0.01 * (raw.bayersensor.greenthresh), rawData); + green_equilibrate(0.01 * raw.bayersensor.greenthresh, rawData); } } diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index bc2589408..8ce27f289 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -233,6 +233,7 @@ protected: void cfa_linedn (float linenoiselevel);//Emil's line denoise + void green_equilibrate_global (array2D &rawData); void green_equilibrate (float greenthresh, array2D &rawData);//Emil's green equilibration void nodemosaic(bool bw);