diff --git a/rtengine/color.cc b/rtengine/color.cc index c2bdcc9dd..d671d9baa 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -595,6 +595,15 @@ void Color::xyz2rgb (float x, float y, float z, float &r, float &g, float &b, co b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; } +#ifdef __SSE2__ +void Color::xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]) +{ + r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; + g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ; + b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; +} +#endif // __SSE2__ + void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb) { // correct gamma for black and white image : pseudo TRC curve of ICC profil diff --git a/rtengine/color.h b/rtengine/color.h index 9601ec3f6..493597a33 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -293,6 +293,9 @@ public: */ static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const double rgb_xyz[3][3]); static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3]); +#ifdef __SSE2__ + static void xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]); +#endif /** diff --git a/rtengine/curves.cc b/rtengine/curves.cc index 9997b38d8..ba1be089a 100644 --- a/rtengine/curves.cc +++ b/rtengine/curves.cc @@ -505,13 +505,9 @@ void CurveFactory::curveCL ( bool & clcutili, const std::vector& clcurve void CurveFactory::curveDehaContL ( bool & dehacontlutili, const std::vector& dehaclcurvePoints, LUTf & dehaclCurve, int skip) { - bool needed; + bool needed = false; DiagonalCurve* dCurve = NULL; - bool histNeededCL = false; - - needed = false; - if (!dehaclcurvePoints.empty() && dehaclcurvePoints[0] != 0) { dCurve = new DiagonalCurve (dehaclcurvePoints, CURVES_MIN_POLY_POINTS / skip); @@ -521,11 +517,6 @@ void CurveFactory::curveDehaContL ( bool & dehacontlutili, const std::vector& wavclcurvePoints, LUTf & wavclCurve, /*LUTu & histogramwavcl, LUTu & outBeforeWavCLurveHistogram,*/int skip) { diff --git a/rtengine/ipdehaz.cc b/rtengine/ipdehaz.cc index 1c5c64afa..66e3789f5 100644 --- a/rtengine/ipdehaz.cc +++ b/rtengine/ipdehaz.cc @@ -1,38 +1,33 @@ - /* - * This file is part of RawTherapee. - * - * Copyright (c) 2004-2010 Gabor Horvath - * - * RawTherapee is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * RawTherapee is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with RawTherapee. If not, see . - - * D. J. Jobson, Z. Rahman, and G. A. Woodell. A multi-scale - * Retinex for bridging the gap between color images and the - * human observation of scenes. IEEE Transactions on Image Processing, - * 1997, 6(7): 965-976 - * inspired from 2003 Fabien Pelisson - - */ - - +/* +* This file is part of RawTherapee. +* +* Copyright (c) 2004-2010 Gabor Horvath +* +* RawTherapee is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* RawTherapee is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with RawTherapee. If not, see . + * D. J. Jobson, Z. Rahman, and G. A. Woodell. A multi-scale + * Retinex for bridging the gap between color images and the + * human observation of scenes. IEEE Transactions on Image Processing, + * 1997, 6(7): 965-976 + * inspired from 2003 Fabien Pelisson +*/ #include #include -#include -#include -#include +#include +#include #include "rtengine.h" #include "gauss.h" #include "rawimagesource.h" @@ -40,228 +35,190 @@ #include "opthelper.h" #include "StopWatch.h" #define MAX_DEHAZE_SCALES 6 -#define clipdehaz( val, minv, maxv ) (( val = (val < minv ? minv : val ) ) > maxv ? maxv : val ) - - namespace rtengine +#define clipdehaz( val, minv, maxv ) (( val = (val < minv ? minv : val ) ) > maxv ? maxv : val ) + +namespace rtengine { extern const Settings* settings; -static float DehazeScales[MAX_DEHAZE_SCALES]; +static float DehazeScales[MAX_DEHAZE_SCALES]; -void dehaze_scales( float* scales, int nscales, int mode, int s) - { - if ( nscales == 1 ) - { - scales[0] = (float)s / 2.f; - } - else if (nscales == 2) - { - scales[0] = (float) s / 2.f; - scales[1] = (float) s; - } - else - { - float size_step = (float) s / (float) nscales; - - if(mode==0) { - for (int i = 0; i < nscales; ++i ) +void dehaze_scales( float* scales, int nscales, int mode, int s) +{ + if ( nscales == 1 ) { + scales[0] = (float)s / 2.f; + } else if (nscales == 2) { + scales[0] = (float) s / 2.f; + scales[1] = (float) s; + } else { + float size_step = (float) s / (float) nscales; + + if (mode == 0) { + for (int i = 0; i < nscales; ++i ) { scales[i] = 2.0f + (float)i * size_step; } - else if (mode==1) { + } else if (mode == 1) { size_step = (float)log(s - 2.0f) / (float) nscales; - for (int i = 0; i < nscales; ++i ) + + for (int i = 0; i < nscales; ++i ) { scales[i] = 2.0f + (float)pow (10.f, (i * size_step) / log (10.f)); } - else if(mode==2){ + } else if (mode == 2) { size_step = (float) log(s - 2.0f) / (float) nscales; - for ( int i = 0; i < nscales; ++i ) + + for ( int i = 0; i < nscales; ++i ) { scales[i] = s - (float)pow (10.f, (i * size_step) / log (10.f)); } + } } } -void mean_stddv( float **dst, float &mean, float &stddv, int W_L, int H_L ) - { - float vsquared = 0.f; - float sum = 0.f; -#pragma omp parallel for reduction(+:sum,vsquared) // this leads to differences, but parallel summation is more accurate - for (int i = 0; i L[i][j] + eps; - } + + for (int i = 0; i < H_L; i++ ) + for (int j = 0; j < W_L; j++) { + sum += dst[i][j]; + vsquared += (dst[i][j] * dst[i][j]); + } + + sum *= factor; + vsquared *= (factor * factor); + mean = sum / (float) (W_L * H_L); + vsquared /= (float) W_L * H_L; + stddv = ( vsquared - (mean * mean) ); + stddv = (float)sqrt(stddv); +} + +void RawImageSource::MSR(float** luminance, float** originalLuminance, int width, int height, DehazParams lcur) +{ + if (lcur.enabled) {//enabled + StopWatch Stop1("MSR"); + float mean, stddv; + float mini, delta, maxi; + float eps = 2.f; + float gain2 = (float) lcur.gain / 100.f; //def =1 not use + float offse = (float) lcur.offs; //def = 0 not use + int scal = lcur.scal; //def=3 + int nei = (int) 2.5f * lcur.neigh; //def = 200 + float vart = (float)lcur.vart / 100.f;//variance + float strength = (float) lcur.str / 100.f; // Blend with original L channel data + + int modedehaz = 0; // default to 0 ( lcur.dehazmet == "uni" ) + + if (lcur.dehazmet == "low") { + modedehaz = 1; + } + + if (lcur.dehazmet == "high") { + modedehaz = 2; } dehaze_scales( DehazeScales, scal, modedehaz, nei ); - - pond = 1.0f / (float) scal; - for ( int scale = 0; scale < scal; scale++ ) - { + int H_L = height; + int W_L = width; + + float *src[H_L] ALIGNED16; + float *srcBuffer = new float[H_L * W_L]; + + for (int i = 0; i < H_L; i++) { + src[i] = &srcBuffer[i * W_L]; + } + #ifdef _OPENMP -#pragma omp parallel -#endif - { - AlignedBufferMP* pBuffer = new AlignedBufferMP (max(W_L, H_L)); - gaussHorizontal (src, out, *pBuffer, W_L, H_L, DehazeScales[scale]); - gaussVertical (out, out, *pBuffer,W_L, H_L, DehazeScales[scale]); - delete pBuffer; - } -#ifdef __SSE2__ -#ifdef _OPENMP -#pragma omp parallel -{ - vfloat pondv = F2V(pond); -#pragma omp for -#endif - for ( int i=0; i < H_L; i++) { - int j; - - // for (j=0; j < W_L-3; j+=4) - { - - // _mm_storeu_ps(&dst[i][j], LVFU(dst[i][j]) + pondv * ( xlogf(LVFU(src[i][j])/LVFU(out[i][j])) )); - } - - // for (;j < W_L; j++) - for (int j=0;j < W_L; j++) - - { - float limds =(src[i][j])/out[i][j]; - if(limds > 10000.f) limds=10000.f; - if(limds < 0.0001f) limds=0.0001f; - - // dst[i][j] += pond * ( xlogf((src[i][j])/out[i][j]) ); - dst[i][j] += pond * ( xlogf((limds) )); - - } - } -} -#else -#ifdef _OPENMP -#pragma omp parallel for -#endif - for ( int i=0; i < H_L; i++) - for (int j=0; j < W_L; j++) - { - float limds =(src[i][j])/out[i][j]; - if(limds > 10000.f) limds=10000.f; - if(limds < 0.0001f) limds=0.0001f; - - dst[i][j] += pond * ( xlogf((src[i][j])/out[i][j]) ); - } + #pragma omp parallel for #endif + + for (int i = 0; i < H_L; i++) + for (int j = 0; j < W_L; j++) { + src[i][j] = luminance[i][j] + eps; + luminance[i][j] = 0.f; } - for (int i = 0; i < H_L; i++) { - delete [] out[i]; - } - delete [] out; - for (int i = 0; i < H_L; i++) { - delete [] src[i]; - } - delete [] src; + float *out[H_L] ALIGNED16; + float *outBuffer = new float[H_L * W_L]; -float beta=16384.0f; -float logBetaGain = xlogf(beta) * gain; - -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i=0; i< H_L; i++ ) - for (int j=0; jL[i][j]=((1.f - strength)* lab->L[i][j] + strength * clipdehaz( cd, 0.f, 32768.f )); - } for (int i = 0; i < H_L; i++) { - delete [] dst[i]; + out[i] = &outBuffer[i * W_L]; } - delete [] dst; - } - } + + float pond = 1.0f / (float) scal; + +#ifdef _OPENMP + #pragma omp parallel +#endif + { + AlignedBufferMP* pBuffer = new AlignedBufferMP (max(W_L, H_L)); + + for ( int scale = 0; scale < scal; scale++ ) { + gaussHorizontal (src, out, *pBuffer, W_L, H_L, DehazeScales[scale]); + gaussVertical (out, out, *pBuffer, W_L, H_L, DehazeScales[scale]); +#ifdef __SSE2__ + vfloat pondv = F2V(pond); + vfloat limMinv = F2V(0.0001f); + vfloat limMaxv = F2V(10000.f); +#endif +#ifdef _OPENMP + #pragma omp for +#endif + + for (int i = 0; i < H_L; i++) + { + int j = 0; +#ifdef __SSE2__ + + for (; j < W_L - 3; j += 4) { + _mm_storeu_ps(&luminance[i][j], LVFU(luminance[i][j]) + pondv * xlogf(LIMV(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv) )); + } + +#endif + + for (; j < W_L; j++) { + luminance[i][j] += pond * xlogf(LIM(src[i][j] / out[i][j], 0.0001f, 10000.f)); + } + } + } + delete pBuffer; + } + + delete [] outBuffer; + delete [] srcBuffer; + + float logBetaGain = xlogf(16384.f); + + mean = 0.f; + stddv = 0.f; + mean_stddv( luminance, mean, stddv, W_L, H_L, logBetaGain); + + mini = mean - vart * stddv; + maxi = mean + vart * stddv; + delta = maxi - mini; + printf("maxi=%f mini=%f mean=%f std=%f delta=%f\n", maxi, mini, mean, stddv, delta); + + if ( !delta ) { + delta = 1.0f; + } + + float cdfactor = gain2 * 32768.f / delta; +#ifdef _OPENMP + #pragma omp parallel for +#endif + + for ( int i = 0; i < H_L; i ++ ) + for (int j = 0; j < W_L; j++) { + float cd = cdfactor * ( luminance[i][j] * logBetaGain - mini ) + offse; + luminance[i][j] = clipdehaz( cd, 0.f, 32768.f ) * strength + (1.f - strength) * originalLuminance[i][j]; + } + } +} } diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index a5d223aa0..596c8ab85 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -1786,10 +1786,10 @@ void RawImageSource::demosaic(const RAWParams &raw) } t2.set(); - + rgbSourceModified = false; - + if( settings->verbose ) { if (getSensorType() == ST_BAYER) { @@ -1802,79 +1802,119 @@ void RawImageSource::demosaic(const RAWParams &raw) void RawImageSource::dehaz(RAWParams raw, ColorManagementParams cmp, DehazParams lcur, LUTf & cdcurve, bool dehacontlutili) { - + MyTime t4, t5; t4.set(); + if(!rgbSourceModified) { if (settings->verbose) { - printf ("Applying DeHaze\n"); + printf ("Applying DeHaze\n"); } - + TMatrix wprof = iccStore->workingSpaceMatrix (cmp.working); TMatrix wiprof = iccStore->workingSpaceInverseMatrix (cmp.working); - - double wip[3][3] = { - {wiprof[0][0], wiprof[0][1], wiprof[0][2]}, - {wiprof[1][0], wiprof[1][1], wiprof[1][2]}, - {wiprof[2][0], wiprof[2][1], wiprof[2][2]} - }; - double wp[3][3] = { - {wprof[0][0], wprof[0][1], wprof[0][2]}, - {wprof[1][0], wprof[1][1], wprof[1][2]}, - {wprof[2][0], wprof[2][1], wprof[2][2]} - }; - LabImage * labdeha = new LabImage(W, H); + double wip[3][3] = { + {wiprof[0][0], wiprof[0][1], wiprof[0][2]}, + {wiprof[1][0], wiprof[1][1], wiprof[1][2]}, + {wiprof[2][0], wiprof[2][1], wiprof[2][2]} + }; - #pragma omp parallel for - for (int i = 0; i lab - Color::rgbxyz(red[i][j], green[i][j], blue[i][j], X, Y, Z, wp); - //convert Lab - Color::XYZ2Lab(X, Y, Z, L, aa, bb); - labdeha->L[i][j]=L; - // if(lcur.dehazmet !="none") { - if(dehacontlutili) labdeha->L[i][j]=cdcurve[L];//apply curve to equalize histogram - // } - labdeha->a[i][j]=aa; - labdeha->b[i][j]=bb; - - } - - MSR(labdeha, W, H, 1, lcur); + double wp[3][3] = { + {wprof[0][0], wprof[0][1], wprof[0][2]}, + {wprof[1][0], wprof[1][1], wprof[1][2]}, + {wprof[2][0], wprof[2][1], wprof[2][2]} + }; + LabImage * labdeha = new LabImage(W, H); + + // We need a buffer with original L data to allow correct blending + float *labTmp[H] ALIGNED16; + float *labTmpBuffer = new float[H * W]; + for (int i = 0; i < H; i++) { + labTmp[i] = &labTmpBuffer[i * W]; + } - #pragma omp parallel for - for (int i = 0; i L[i][j]; - a2=labdeha->a[i][j]; - b2=labdeha->b[i][j]; - Color::Lab2XYZ(L2, a2, b2, x_, y_, z_) ; - Color::xyz2rgb(x_, y_, z_, R, G, B, wip); - red[i][j]=R; - green[i][j]=G; - blue[i][j]=B; - } - - delete labdeha; - - t5.set(); - + // Conversion rgb -> lab is hard to vectorize because it uses a lut (that's not the main problem) + // and it uses a condition inside XYZ2Lab which is almost impossible to vectorize without making it slower... +#ifdef _OPENMP + #pragma omp parallel for +#endif + for (int i = 0; i < H; i++ ) + for (int j = 0; j < W; j++) { + float X, Y, Z, L, aa, bb; + //rgb=>lab + Color::rgbxyz(red[i][j], green[i][j], blue[i][j], X, Y, Z, wp); + //convert Lab + Color::XYZ2Lab(X, Y, Z, L, aa, bb); + labTmp[i][j] = L; + if(dehacontlutili) { + L = cdcurve[L]; //apply curve to equalize histogram + } + labdeha->L[i][j] = L; + labdeha->a[i][j] = aa; + labdeha->b[i][j] = bb; + } - if( settings->verbose ) { - printf("Dehaz=%d usec\n", t5.etime(t4)); - - } - rgbSourceModified = true; + MSR(labdeha->L, labTmp, W, H, lcur); + + delete [] labTmpBuffer; + +#ifdef __SSE2__ + vfloat wipv[3][3]; + + for(int i = 0; i < 3; i++) + for(int j = 0; j < 3; j++) { + wipv[i][j] = F2V(wiprof[i][j]); + } + +#endif // __SSE2__ +#ifdef _OPENMP + #pragma omp parallel for +#endif + for (int i = 0; i < H; i++ ) { + int j = 0; +#ifdef __SSE2__ + + for (; j < W - 3; j += 4) { + vfloat L2, a2, b2, x_, y_, z_; + vfloat R, G, B; + L2 = LVFU(labdeha->L[i][j]); + a2 = LVFU(labdeha->a[i][j]); + b2 = LVFU(labdeha->b[i][j]); + Color::Lab2XYZ(L2, a2, b2, x_, y_, z_) ; + Color::xyz2rgb(x_, y_, z_, R, G, B, wipv); + _mm_storeu_ps(&red[i][j], R); + _mm_storeu_ps(&green[i][j], G); + _mm_storeu_ps(&blue[i][j], B); + } +#endif + for (; j < W; j++) { + float L2, a2, b2, x_, y_, z_; + float R, G, B; + L2 = labdeha->L[i][j]; + a2 = labdeha->a[i][j]; + b2 = labdeha->b[i][j]; + Color::Lab2XYZ(L2, a2, b2, x_, y_, z_) ; + Color::xyz2rgb(x_, y_, z_, R, G, B, wip); + red[i][j] = R; + green[i][j] = G; + blue[i][j] = B; + } + } + + delete labdeha; + + t5.set(); + + if( settings->verbose ) { + printf("Dehaz=%d usec\n", t5.etime(t4)); + } + + rgbSourceModified = true; } } - void RawImageSource::flushRawData() { if(cache) { @@ -1909,11 +1949,12 @@ void RawImageSource::HLRecovery_Global(ToneCurveParams hrp) if (settings->verbose) { printf ("Applying Highlight Recovery: Color propagation...\n"); } + HLRecovery_inpaint (red, green, blue); rgbSourceModified = true; } } - + } diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 5a58d935e..2cd472de5 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -227,7 +227,7 @@ public: void boxblur2(float** src, float** dst, int H, int W, int box ); void boxblur_resamp(float **src, float **dst, int H, int W, int box, int samp ); - void MSR(LabImage* lab, int width, int height, int skip, DehazParams lcur); + void MSR(float** luminance, float **originalLuminance, int width, int height, DehazParams lcur); //void boxblur_resamp(float **red, float **green, float **blue, int H, int W, float thresh[3], float max[3], // multi_array2D & hfsize, multi_array2D & hilite, int box ); diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 628c86dd8..dbc289276 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -115,14 +115,14 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p if (pl) { pl->setProgress (0.30); } - LUTf cdcurve (65536, 0); - - bool dehacontlutili=false; - CurveFactory::curveDehaContL (dehacontlutili, params.dehaz.cdcurve, cdcurve, 1); - if(params.dehaz.dehazmet!="none") - imgsrc->dehaz( params.raw, params.icm, params.dehaz, cdcurve, dehacontlutili );//enabled Dehaze - + if(params.dehaz.enabled) { //enabled Dehaze + LUTf cdcurve (65536, 0); + + bool dehacontlutili=false; + CurveFactory::curveDehaContL (dehacontlutili, params.dehaz.cdcurve, cdcurve, 1); + imgsrc->dehaz( params.raw, params.icm, params.dehaz, cdcurve, dehacontlutili ); + } if (pl) { pl->setProgress (0.40); diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index ff14758b7..7dc1e57ea 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -1029,6 +1029,10 @@ void ParamsEdited::combine (rtengine::procparams::ProcParams& toEdit, const rten toEdit.dehaz.transmissionCurve = mods.dehaz.transmissionCurve; } + if (dehaz.dehazmet) { + toEdit.dehaz.dehazmet = mods.dehaz.dehazmet; + } + if (dehaz.str) { toEdit.dehaz.str = mods.dehaz.str; }