diff --git a/.travis.yml.fixme b/.travis.yml.fixme deleted file mode 100644 index 0aa85f3b4..000000000 --- a/.travis.yml.fixme +++ /dev/null @@ -1,44 +0,0 @@ -sudo: required -dist: trusty - -language: cpp - -compiler: - - gcc - -os: - - linux - -#branches: -# only: -# - master - -notifications: - irc: - channels: - - "chat.freenode.net#rawtherapee" - skip_join: true - template: - - "%{repository}/%{branch} (%{commit} - %{author}): %{build_url}: %{message}" - email: - on_success: change - on_failure: always - -env: - global: - - OMP_NUM_THREADS=4 - -before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ xenial main" - - sudo apt-get -qq update - - sudo apt-get install gcc-6 g++-6 - - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 - - sudo apt-get install build-essential cmake curl git libbz2-dev libcanberra-gtk3-dev libexiv2-dev libexpat-dev libfftw3-dev libglibmm-2.4-dev libgtk-3-dev libgtkmm-3.0-dev libiptcdata0-dev libjpeg8-dev liblcms2-dev libpng12-dev libsigc++-2.0-dev libtiff5-dev zlib1g-dev - -before_script: - - mkdir build - - cd build - - cmake -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" -DWITH_LTO="OFF" -DPROC_TARGET_NUMBER="2" .. - -script: make diff --git a/rtdata/languages/default b/rtdata/languages/default index f85fdd312..585e4aeca 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -744,6 +744,7 @@ HISTORY_MSG_COLORTONING_LABREGION_SHOWMASK;CT - region show mask HISTORY_MSG_COLORTONING_LABREGION_SLOPE;CT - region slope HISTORY_MSG_DEHAZE_DEPTH;Dehaze - Depth HISTORY_MSG_DEHAZE_ENABLED;Haze Removal +HISTORY_MSG_DEHAZE_LUMINANCE;Dehaze - Luminance only HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP;Dehaze - Show depth map HISTORY_MSG_DEHAZE_STRENGTH;Dehaze - Strength HISTORY_MSG_DUALDEMOSAIC_AUTO_CONTRAST;Dual demosaic - Auto threshold @@ -770,6 +771,7 @@ HISTORY_MSG_PDSHARPEN_AUTO_RADIUS;CAS - Auto radius HISTORY_MSG_PDSHARPEN_GAMMA;CAS - Gamma HISTORY_MSG_PDSHARPEN_ITERATIONS;CAS - Iterations HISTORY_MSG_PDSHARPEN_RADIUS;CAS - Radius +HISTORY_MSG_PDSHARPEN_RADIUS_BOOST;CAS - Corner radius boost HISTORY_MSG_PIXELSHIFT_DEMOSAIC;PS - Demosaic method for motion HISTORY_MSG_PREPROCESS_LINEDENOISE_DIRECTION;Line noise filter direction HISTORY_MSG_PREPROCESS_PDAFLINESFILTER;PDAF lines filter @@ -1548,6 +1550,7 @@ TP_DEFRINGE_RADIUS;Radius TP_DEFRINGE_THRESHOLD;Threshold TP_DEHAZE_DEPTH;Depth TP_DEHAZE_LABEL;Haze Removal +TP_DEHAZE_LUMINANCE;Luminance only TP_DEHAZE_SHOW_DEPTH_MAP;Show depth map TP_DEHAZE_STRENGTH;Strength TP_DIRPYRDENOISE_CHROMINANCE_AMZ;Auto multi-zones @@ -1811,7 +1814,6 @@ TP_PCVIGNETTE_ROUNDNESS_TOOLTIP;Roundness:\n0 = rectangle,\n50 = fitted ellipse, TP_PCVIGNETTE_STRENGTH;Strength TP_PCVIGNETTE_STRENGTH_TOOLTIP;Filter strength in stops (reached in corners). TP_PDSHARPENING_LABEL;Capture Sharpening -TP_PDSHARPENING_AUTORADIUS_TOOLTIP;If the checkbox is checked, RawTherapee calculates a value based on the raw data of the image. TP_PERSPECTIVE_HORIZONTAL;Horizontal TP_PERSPECTIVE_LABEL;Perspective TP_PERSPECTIVE_VERTICAL;Vertical @@ -2047,7 +2049,7 @@ TP_SHARPENING_LABEL;Sharpening TP_SHARPENING_METHOD;Method TP_SHARPENING_ONLYEDGES;Sharpen only edges TP_SHARPENING_RADIUS;Radius -TP_SHARPENING_RADIUS_OFFSET;Radius corner offset +TP_SHARPENING_RADIUS_BOOST;Corner radius boost TP_SHARPENING_RLD;RL Deconvolution TP_SHARPENING_RLD_AMOUNT;Amount TP_SHARPENING_RLD_DAMPING;Damping diff --git a/rtdata/rawtherapee.desktop.in b/rtdata/rawtherapee.desktop.in index c6c675c4d..b059e7d6a 100644 --- a/rtdata/rawtherapee.desktop.in +++ b/rtdata/rawtherapee.desktop.in @@ -1,6 +1,6 @@ [Desktop Entry] Type=Application -Version=1.1 +Version=1.0 Name=RawTherapee GenericName=Raw Photo Editor GenericName[cs]=Editor raw obrázků diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index da302964b..27aa9d2fc 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -20,12 +20,14 @@ #define _BOXBLUR_H_ #include +#include #include #include #include #include "alignedbuffer.h" #include "rt_math.h" #include "opthelper.h" +#include "StopWatch.h" namespace rtengine @@ -204,15 +206,15 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, tempv = tempv / lenv; temp1v = temp1v / lenv; - STVFU( dst[0][col], tempv); - STVFU( dst[0][col + 4], temp1v); + STVFU(dst[0][col], tempv); + STVFU(dst[0][col + 4], temp1v); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); lenv = lenp1v; } @@ -221,16 +223,16 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); lenv = lenm1v; } } @@ -312,6 +314,223 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, } +inline void boxblur (float** src, float** dst, int radius, int W, int H, bool multiThread) +{ + //box blur using rowbuffers and linebuffers instead of a full size buffer + + if (radius == 0) { + if (src != dst) { +#ifdef _OPENMP + #pragma omp parallel for if (multiThread) +#endif + + for (int row = 0; row < H; row++) { + for (int col = 0; col < W; col++) { + dst[row][col] = src[row][col]; + } + } + } + return; + } + + constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache +#ifdef _OPENMP + #pragma omp parallel if (multiThread) +#endif + { + std::unique_ptr buffer(new float[std::max(W, 8 * H)]); + + //horizontal blur + float* const lineBuffer = buffer.get(); +#ifdef _OPENMP + #pragma omp for +#endif + for (int row = 0; row < H; row++) { + float len = radius + 1; + float tempval = src[row][0]; + lineBuffer[0] = tempval; + for (int j = 1; j <= radius; j++) { + tempval += src[row][j]; + } + + tempval /= len; + dst[row][0] = tempval; + + for (int col = 1; col <= radius; col++) { + lineBuffer[col] = src[row][col]; + tempval = (tempval * len + src[row][col + radius]) / (len + 1); + dst[row][col] = tempval; + ++len; + } + + for (int col = radius + 1; col < W - radius; col++) { + lineBuffer[col] = src[row][col]; + dst[row][col] = tempval = tempval + (src[row][col + radius] - lineBuffer[col - radius - 1]) / len; + } + + for (int col = W - radius; col < W; col++) { + dst[row][col] = tempval = (tempval * len - lineBuffer[col - radius - 1]) / (len - 1); + --len; + } + } + + //vertical blur +#ifdef __SSE2__ + vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get(); + const vfloat leninitv = F2V(radius + 1); + const vfloat onev = F2V(1.f); + vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; + +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - 7; col += 8) { + lenv = leninitv; + tempv = LVFU(dst[0][col]); + temp1v = LVFU(dst[0][col + 4]); + rowBuffer[0][0] = tempv; + rowBuffer[0][1] = temp1v; + + for (int i = 1; i <= radius; i++) { + tempv = tempv + LVFU(dst[i][col]); + temp1v = temp1v + LVFU(dst[i][col + 4]); + } + + tempv = tempv / lenv; + temp1v = temp1v / lenv; + STVFU(dst[0][col], tempv); + STVFU(dst[0][col + 4], temp1v); + + for (int row = 1; row <= radius; row++) { + rowBuffer[row][0] = LVFU(dst[row][col]); + rowBuffer[row][1] = LVFU(dst[row][col + 4]); + lenp1v = lenv + onev; + tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v; + temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenp1v; + } + + rlenv = onev / lenv; + + for (int row = radius + 1; row < H - radius; row++) { + rowBuffer[row][0] = LVFU(dst[row][col]); + rowBuffer[row][1] = LVFU(dst[row][col + 4]); + tempv = tempv + (LVFU(dst[row + radius][col]) - rowBuffer[row - radius - 1][0]) * rlenv ; + temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - rowBuffer[row - radius - 1][1]) * rlenv ; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + } + + for (int row = H - radius; row < H; row++) { + lenm1v = lenv - onev; + tempv = (tempv * lenv - rowBuffer[row - radius - 1][0]) / lenm1v; + temp1v = (temp1v * lenv - rowBuffer[row - radius - 1][1]) / lenm1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenm1v; + } + } + +#else + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - numCols + 1; col += 8) { + float len = radius + 1; + + for (int k = 0; k < numCols; k++) { + rowBuffer[0][k] = dst[0][col + k]; + } + + for (int i = 1; i <= radius; i++) { + for (int k = 0; k < numCols; k++) { + dst[0][col + k] += dst[i][col + k]; + } + } + + for(int k = 0; k < numCols; k++) { + dst[0][col + k] /= len; + } + + for (int row = 1; row <= radius; row++) { + for(int k = 0; k < numCols; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + + len ++; + } + + for (int row = radius + 1; row < H - radius; row++) { + for(int k = 0; k < numCols; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) / len; + } + } + + for (int row = H - radius; row < H; row++) { + for(int k = 0; k < numCols; k++) { + dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1); + } + + len --; + } + } + +#endif + //vertical blur, remaining columns +#ifdef _OPENMP + #pragma omp single +#endif + { + const int remaining = W % numCols; + + if (remaining > 0) { + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); + const int col = W - remaining; + + float len = radius + 1; + for(int k = 0; k < remaining; ++k) { + rowBuffer[0][k] = dst[0][col + k]; + } + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { + dst[0][col + k] += dst[row][col + k]; + } + } + for(int k = 0; k < remaining; ++k) { + dst[0][col + k] /= len; + } + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + len ++; + } + const float rlen = 1.f / len; + for (int row = radius + 1; row < H - radius; ++row) { + for(int k = 0; k < remaining; ++k) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) * rlen; + } + } + for (int row = H - radius; row < H; ++row) { + for(int k = 0; k < remaining; ++k) { + dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1); + } + len --; + } + } + } + } +} + template void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H) { //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) @@ -382,15 +601,15 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in tempv = tempv / lenv; temp1v = temp1v / lenv; - STVFU( dst[0 * W + col], tempv); - STVFU( dst[0 * W + col + 4], temp1v); + STVFU(dst[0 * W + col], tempv); + STVFU(dst[0 * W + col + 4], temp1v); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); lenv = lenp1v; } @@ -399,16 +618,16 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); lenv = lenm1v; } } @@ -422,12 +641,12 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in } tempv = tempv / lenv; - STVFU( dst[0 * W + col], tempv); + STVFU(dst[0 * W + col], tempv); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); lenv = lenp1v; } @@ -435,13 +654,13 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); lenv = lenm1v; } } diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index e08243713..6720d9abc 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -532,9 +532,9 @@ BENCHFUN constexpr int tileSize = 194; constexpr int border = 5; constexpr int fullTileSize = tileSize + 2 * border; - const float maxRadius = std::min(1.15f, sigma + sigmaCornerOffset); - const float maxDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f)); - const float distanceFactor = (maxRadius - sigma) / maxDistance; + const float cornerRadius = std::min(1.15f, sigma + sigmaCornerOffset); + const float cornerDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f)); + const float distanceFactor = (cornerRadius - sigma) / cornerDistance; double progress = startVal; const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H); @@ -581,14 +581,17 @@ BENCHFUN gauss5x5mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel5); } } else { - if (sigmaCornerOffset > 0.0) { - float lkernel7[7][7]; + if (sigmaCornerOffset != 0.0) { const float distance = sqrt(rtengine::SQR(i + tileSize / 2 - H / 2) + rtengine::SQR(j + tileSize / 2 - W / 2)); - compute7x7kernel(sigma + distanceFactor * distance, lkernel7); - for (int k = 0; k < iterations - 1; ++k) { - // apply 7x7 gaussian blur and divide luminance by result of gaussian blur - gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7); - gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7); + const float sigmaTile = sigma + distanceFactor * distance; + if (sigmaTile >= 0.4f) { + float lkernel7[7][7]; + compute7x7kernel(sigma + distanceFactor * distance, lkernel7); + for (int k = 0; k < iterations - 1; ++k) { + // apply 7x7 gaussian blur and divide luminance by result of gaussian blur + gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7); + gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7); + } } } else { for (int k = 0; k < iterations; ++k) { diff --git a/rtengine/color.h b/rtengine/color.h index b859fb0cf..97835ba10 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -210,6 +210,13 @@ public: return r * workingspace[1][0] + g * workingspace[1][1] + b * workingspace[1][2]; } +#ifdef __SSE2__ + static vfloat rgbLuminance(vfloat r, vfloat g, vfloat b, const vfloat workingspace[3]) + { + return r * workingspace[0] + g * workingspace[1] + b * workingspace[2]; + } +#endif + /** * @brief Convert red/green/blue to L*a*b * @brief Convert red/green/blue to hue/saturation/luminance diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index d2c68e2e8..ef209118b 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -2417,59 +2417,78 @@ void CLASS hasselblad_correct() void CLASS hasselblad_load_raw() { - struct jhead jh; - int shot, row, col, *back[5], len[2], diff[12], pred, sh, f, s, c; - unsigned upix, urow, ucol; - ushort *ip; + struct jhead jh; + int *back[5], diff[12]; - if (!ljpeg_start (&jh, 0)) return; - order = 0x4949; - ph1_bithuff_t ph1_bithuff(this, ifp, order); - hb_bits(-1); - back[4] = (int *) calloc (raw_width, 3*sizeof **back); - merror (back[4], "hasselblad_load_raw()"); - FORC3 back[c] = back[4] + c*raw_width; - cblack[6] >>= sh = tiff_samples > 1; - shot = LIM(shot_select, 1, tiff_samples) - 1; - for (row=0; row < raw_height; row++) { - FORC4 back[(c+3) & 3] = back[c]; - for (col=0; col < raw_width; col+=2) { - for (s=0; s < tiff_samples*2; s+=2) { - FORC(2) len[c] = ph1_huff(jh.huff[0]); - FORC(2) { - diff[s+c] = hb_bits(len[c]); - if ((diff[s+c] & (1 << (len[c]-1))) == 0) - diff[s+c] -= (1 << len[c]) - 1; - if (diff[s+c] == 65535) diff[s+c] = -32768; - } - } - for (s=col; s < col+2; s++) { - pred = 0x8000 + load_flags; - if (col) pred = back[2][s-2]; - if (col && row > 1) switch (jh.psv) { - case 11: pred += back[0][s]/2 - back[0][s-2]/2; break; - } - f = (row & 1)*3 ^ ((col+s) & 1); - FORC (tiff_samples) { - pred += diff[(s & 1)*tiff_samples+c]; - upix = pred >> sh & 0xffff; - if (raw_image && c == shot) - RAW(row,s) = upix; - if (image) { - urow = row-top_margin + (c & 1); - ucol = col-left_margin - ((c >> 1) & 1); - ip = &image[urow*width+ucol][f]; - if (urow < height && ucol < width) - *ip = c < 4 ? upix : (*ip + upix) >> 1; - } - } - back[2][s] = pred; - } + if (!ljpeg_start (&jh, 0)) { + return; + } + order = 0x4949; + ph1_bithuff_t ph1_bithuff(this, ifp, order); + hb_bits(-1); + back[4] = (int *) calloc(raw_width, 3 * sizeof **back); + merror(back[4], "hasselblad_load_raw()"); + for (int c = 0; c < 3; ++c) { + back[c] = back[4] + c * raw_width; + } + const int sh = tiff_samples > 1; + cblack[6] >>= sh; + const int shot = LIM(shot_select, 1, tiff_samples) - 1; + for (int row = 0; row < raw_height; ++row) { + for (int c = 0; c < 4; ++c) { + back[(c + 3) & 3] = back[c]; + } + for (int col = 0; col < raw_width; col += 2) { + for (int s = 0; s < tiff_samples * 2; s += 2) { + const int len[2]= { + static_cast(ph1_huff(jh.huff[0])), + static_cast(ph1_huff(jh.huff[0])) + }; + for (int c = 0; c < 2; ++c) { + diff[s + c] = hb_bits(len[c]); + if ((diff[s + c] & (1 << (len[c] - 1))) == 0) { + diff[s + c] -= (1 << len[c]) - 1; + } + if (diff[s + c] == 65535) { + diff[s + c] = -32768; + } + } + } + for (int s = col; s < col + 2; ++s) { + int pred; + if (col) { + pred = back[2][s - 2]; + if (row > 1 && jh.psv == 11) { + pred += back[0][s] / 2 - back[0][s - 2] / 2; + } + } else { + pred = 0x8000 + load_flags; + } + for (int c = 0; c < tiff_samples; ++c) { + pred += diff[(s & 1) * tiff_samples + c]; + const unsigned upix = pred >> sh & 0xffff; + if (raw_image && c == shot) { + RAW(row, s) = upix; + } + if (image) { + const int f = (row & 1) * 3 ^ ((col + s) & 1); + const unsigned urow = row - top_margin + (c & 1); + const unsigned ucol = col - left_margin - ((c >> 1) & 1); + ushort* const ip = &image[urow * width + ucol][f]; + if (urow < height && ucol < width) { + *ip = c < 4 ? upix : (*ip + upix) >> 1; + } + } + } + back[2][s] = pred; + } + } + } + free(back[4]); + ljpeg_end(&jh); + if (image) { + mix_green = 1; } - } - free (back[4]); - ljpeg_end (&jh); - if (image) mix_green = 1; } void CLASS leaf_hdr_load_raw() diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index 0ebe6c172..159e89504 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -3,6 +3,7 @@ * This file is part of RawTherapee. * * Copyright (c) 2018 Alberto Griggio + * Optimized 2019 Ingo Weyrich * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,9 +17,9 @@ * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . - */ +*/ -/** +/* * This is a Fast Guided Filter implementation, derived directly from the * pseudo-code of the paper: * @@ -26,32 +27,16 @@ * by Kaiming He, Jian Sun * * available at https://arxiv.org/abs/1505.00996 - */ +*/ #include "guidedfilter.h" #include "boxblur.h" #include "rescale.h" #include "imagefloat.h" - +#define BENCHMARK +#include "StopWatch.h" namespace rtengine { -#if 0 -# define DEBUG_DUMP(arr) \ - do { \ - Imagefloat im(arr.width(), arr.height()); \ - const char *out = "/tmp/" #arr ".tif"; \ - for (int y = 0; y < im.getHeight(); ++y) { \ - for (int x = 0; x < im.getWidth(); ++x) { \ - im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \ - } \ - } \ - im.saveTIFF(out, 16); \ - } while (false) -#else -# define DEBUG_DUMP(arr) -#endif - - namespace { int calculate_subsampling(int w, int h, int r) @@ -78,18 +63,10 @@ int calculate_subsampling(int w, int h, int r) void guidedFilter(const array2D &guide, const array2D &src, array2D &dst, int r, float epsilon, bool multithread, int subsampling) { - - const int W = src.width(); - const int H = src.height(); - - if (subsampling <= 0) { - subsampling = calculate_subsampling(W, H, r); - } - - enum Op { MUL, DIVEPSILON, ADD, SUB, ADDMUL, SUBMUL }; + enum Op {MUL, DIVEPSILON, SUBMUL}; const auto apply = - [=](Op op, array2D &res, const array2D &a, const array2D &b, const array2D &c=array2D()) -> void + [multithread, epsilon](Op op, array2D &res, const array2D &a, const array2D &b, const array2D &c=array2D()) -> void { const int w = res.width(); const int h = res.height(); @@ -99,137 +76,109 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 #endif for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) { - float r; - float aa = a[y][x]; - float bb = b[y][x]; switch (op) { - case MUL: - r = aa * bb; - break; - case DIVEPSILON: - r = aa / (bb + epsilon); - break; - case ADD: - r = aa + bb; - break; - case SUB: - r = aa - bb; - break; - case ADDMUL: - r = aa * bb + c[y][x]; - break; - case SUBMUL: - r = c[y][x] - (aa * bb); - break; - default: - assert(false); - r = 0; - break; + case MUL: + res[y][x] = a[y][x] * b[y][x]; + break; + case DIVEPSILON: + res[y][x] = a[y][x] / (b[y][x] + epsilon); // note: the value of epsilon intentionally has an impact on the result. It is not only to avoid divisions by zero + break; + case SUBMUL: + res[y][x] = c[y][x] - (a[y][x] * b[y][x]); + break; + default: + assert(false); + res[y][x] = 0; + break; } - res[y][x] = r; } } }; - // use the terminology of the paper (Algorithm 2) - const array2D &I = guide; - const array2D &p = src; - array2D &q = dst; - const auto f_subsample = - [=](array2D &d, const array2D &s) -> void + [multithread](array2D &d, const array2D &s) -> void { rescaleBilinear(s, d, multithread); }; - const auto f_upsample = f_subsample; - - const size_t w = W / subsampling; - const size_t h = H / subsampling; - - AlignedBuffer blur_buf(w * h); const auto f_mean = - [&](array2D &d, array2D &s, int rad) -> void + [multithread](array2D &d, array2D &s, int rad) -> void { rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1); - float **src = s; - float **dst = d; -#ifdef _OPENMP - #pragma omp parallel if (multithread) -#endif - boxblur(src, dst, blur_buf.data, rad, rad, s.width(), s.height()); + boxblur(s, d, rad, s.width(), s.height(), multithread); }; + const int W = src.width(); + const int H = src.height(); + + if (subsampling <= 0) { + subsampling = calculate_subsampling(W, H, r); + } + + const size_t w = W / subsampling; + const size_t h = H / subsampling; + const float r1 = float(r) / subsampling; + array2D I1(w, h); array2D p1(w, h); - f_subsample(I1, I); - f_subsample(p1, p); + f_subsample(I1, guide); - DEBUG_DUMP(I); - DEBUG_DUMP(p); - DEBUG_DUMP(I1); - DEBUG_DUMP(p1); + if (&guide == &src) { + f_mean(p1, I1, r1); - float r1 = float(r) / subsampling; + apply(MUL, I1, I1, I1); // I1 = I1 * I1 - array2D meanI(w, h); - f_mean(meanI, I1, r1); - DEBUG_DUMP(meanI); + f_mean(I1, I1, r1); - array2D meanp(w, h); - f_mean(meanp, p1, r1); - DEBUG_DUMP(meanp); + apply(SUBMUL, I1, p1, p1, I1); // I1 = I1 - p1 * p1 + apply(DIVEPSILON, I1, I1, I1); // I1 = I1 / (I1 + epsilon) + apply(SUBMUL, p1, I1, p1, p1); // p1 = p1 - I1 * p1 - array2D &corrIp = p1; - apply(MUL, corrIp, I1, p1); - f_mean(corrIp, corrIp, r1); - DEBUG_DUMP(corrIp); + } else { + f_subsample(p1, src); - array2D &corrI = I1; - apply(MUL, corrI, I1, I1); - f_mean(corrI, corrI, r1); - DEBUG_DUMP(corrI); + array2D meanI(w, h); + f_mean(meanI, I1, r1); - array2D &varI = corrI; - apply(SUBMUL, varI, meanI, meanI, corrI); - DEBUG_DUMP(varI); + array2D meanp(w, h); + f_mean(meanp, p1, r1); - array2D &covIp = corrIp; - apply(SUBMUL, covIp, meanI, meanp, corrIp); - DEBUG_DUMP(covIp); + apply(MUL, p1, I1, p1); - array2D &a = varI; - apply(DIVEPSILON, a, covIp, varI); - DEBUG_DUMP(a); + f_mean(p1, p1, r1); - array2D &b = covIp; - apply(SUBMUL, b, a, meanI, meanp); - DEBUG_DUMP(b); + apply(MUL, I1, I1, I1); - meanI.free(); // frees w * h * 4 byte - meanp.free(); // frees w * h * 4 byte + f_mean(I1, I1, r1); - array2D &meana = a; - f_mean(meana, a, r1); - DEBUG_DUMP(meana); + apply(SUBMUL, I1, meanI, meanI, I1); + apply(SUBMUL, p1, meanI, meanp, p1); + apply(DIVEPSILON, I1, p1, I1); + apply(SUBMUL, p1, I1, meanI, meanp); + } - array2D &meanb = b; - f_mean(meanb, b, r1); - DEBUG_DUMP(meanb); + f_mean(I1, I1, r1); + f_mean(p1, p1, r1); - blur_buf.resize(0); // frees w * h * 4 byte + const int Ws = I1.width(); + const int Hs = I1.height(); + const int Wd = dst.width(); + const int Hd = dst.height(); - array2D meanA(W, H); - f_upsample(meanA, meana); - DEBUG_DUMP(meanA); + const float col_scale = static_cast(Ws) / static_cast(Wd); + const float row_scale = static_cast(Hs) / static_cast(Hd); - array2D &meanB = q; - f_upsample(meanB, meanb); - DEBUG_DUMP(meanB); +#ifdef _OPENMP + #pragma omp parallel for if (multithread) +#endif - apply(ADDMUL, q, meanA, I, meanB); - DEBUG_DUMP(q); + for (int y = 0; y < Hd; ++y) { + const float ymrs = y * row_scale; + for (int x = 0; x < Wd; ++x) { + dst[y][x] = getBilinearValue(I1, x * col_scale, ymrs) * guide[y][x] + getBilinearValue(p1, x * col_scale, ymrs); + } + } } } // namespace rtengine diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 60d4cb9ff..e7bf71ba6 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . - */ +*/ /* * Haze removal using the algorithm described in the paper: @@ -26,15 +26,16 @@ * * using a guided filter for the "soft matting" of the transmission map * - */ +*/ +#include #include -#include +#include #include "guidedfilter.h" #include "improcfun.h" #include "procparams.h" -#include "rt_algo.h" +#include "rescale.h" #include "rt_math.h" extern Options options; @@ -43,24 +44,103 @@ namespace rtengine { namespace { -#if 0 -# define DEBUG_DUMP(arr) \ - do { \ - Imagefloat im(arr.width(), arr.height()); \ - const char *out = "/tmp/" #arr ".tif"; \ - for (int y = 0; y < im.getHeight(); ++y) { \ - for (int x = 0; x < im.getWidth(); ++x) { \ - im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \ - } \ - } \ - im.saveTIFF(out, 16); \ - } while (false) -#else -# define DEBUG_DUMP(arr) +float normalize(Imagefloat *rgb, bool multithread) +{ + float maxval = 0.f; + const int W = rgb->getWidth(); + const int H = rgb->getHeight(); +#ifdef _OPENMP + #pragma omp parallel for reduction(max:maxval) schedule(dynamic, 16) if (multithread) #endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + maxval = max(maxval, rgb->r(y, x), rgb->g(y, x), rgb->b(y, x)); + } + } + maxval = max(maxval * 2.f, 65535.f); +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) if (multithread) +#endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + rgb->r(y, x) /= maxval; + rgb->g(y, x) /= maxval; + rgb->b(y, x) /= maxval; + } + } + return maxval; +} +void restore(Imagefloat *rgb, float maxval, bool multithread) +{ + const int W = rgb->getWidth(); + const int H = rgb->getHeight(); + if (maxval > 0.f && maxval != 1.f) { +#ifdef _OPENMP +# pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + rgb->r(y, x) *= maxval; + rgb->g(y, x) *= maxval; + rgb->b(y, x) *= maxval; + } + } + } +} -int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread) +int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, const array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength) +{ + const int W = R.width(); + const int H = R.height(); + +#ifdef _OPENMP + #pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < H; y += patchsize) { + const int pH = min(y + patchsize, H); + for (int x = 0; x < W; x += patchsize) { + float minR = RT_INFINITY_F; + float minG = RT_INFINITY_F; + float minB = RT_INFINITY_F; +#ifdef __SSE2__ + vfloat minRv = F2V(minR); + vfloat minGv = F2V(minG); + vfloat minBv = F2V(minB); +#endif + const int pW = min(x + patchsize, W); + for (int yy = y; yy < pH; ++yy) { + int xx = x; +#ifdef __SSE2__ + for (; xx < pW - 3; xx += 4) { + minRv = vminf(minRv, LVFU(R[yy][xx])); + minGv = vminf(minGv, LVFU(G[yy][xx])); + minBv = vminf(minBv, LVFU(B[yy][xx])); + } +#endif + for (; xx < pW; ++xx) { + minR = min(minR, R[yy][xx]); + minG = min(minG, G[yy][xx]); + minB = min(minB, B[yy][xx]); + } + } +#ifdef __SSE2__ + minR = min(minR, vhmin(minRv)); + minG = min(minG, vhmin(minGv)); + minB = min(minB, vhmin(minBv)); +#endif + float val = min(minR / ambient[0], minG / ambient[1], minB / ambient[2]); + val = 1.f - strength * LIM01(val); + for (int yy = y; yy < pH; ++yy) { + std::fill(dst[yy] + x, dst[yy] + pW, val); + } + } + } + + return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); +} + +int get_dark_channel_downsized(const array2D &R, const array2D &G, const array2D &B, const array2D &dst, int patchsize, bool multithread) { const int W = R.width(); const int H = R.height(); @@ -73,22 +153,11 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr for (int x = 0; x < W; x += patchsize) { float val = RT_INFINITY_F; const int pW = min(x + patchsize, W); - for (int yy = y; yy < pH; ++yy) { - for (int xx = x; xx < pW; ++xx) { - float r = R[yy][xx]; - float g = G[yy][xx]; - float b = B[yy][xx]; - if (ambient) { - r /= ambient[0]; - g /= ambient[1]; - b /= ambient[2]; - } - val = min(val, r, g, b); + for (int xx = x; xx < pW; ++xx) { + for (int yy = y; yy < pH; ++yy) { + val = min(val, R[yy][xx], G[yy][xx], B[yy][xx]); } } - if (clip) { - val = LIM01(val); - } for (int yy = y; yy < pH; ++yy) { std::fill(dst[yy] + x, dst[yy] + pW, val); } @@ -98,33 +167,24 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); } - float estimate_ambient_light(const array2D &R, const array2D &G, const array2D &B, const array2D &dark, int patchsize, int npatches, float ambient[3]) { const int W = R.width(); const int H = R.height(); - const auto get_percentile = - [](std::priority_queue &q, float prcnt) -> float - { - size_t n = LIM(q.size() * prcnt, 1, q.size()); - while (q.size() > n) { - q.pop(); - } - return q.top(); - }; - float darklim = RT_INFINITY_F; { - std::priority_queue p; + std::vector p; for (int y = 0; y < H; y += patchsize) { for (int x = 0; x < W; x += patchsize) { if (!OOG(dark[y][x], 1.f - 1e-5f)) { - p.push(dark[y][x]); + p.push_back(dark[y][x]); } } } - darklim = get_percentile(p, 0.95); + const int pos = p.size() * 0.95; + std::nth_element(p.begin(), p.begin() + pos, p.end()); + darklim = p[pos]; } std::vector> patches; @@ -145,7 +205,8 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c float bright_lim = RT_INFINITY_F; { - std::priority_queue l; + std::vector l; + l.reserve(patches.size() * patchsize * patchsize); for (auto &p : patches) { const int pW = min(p.first+patchsize, W); @@ -153,12 +214,13 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c for (int y = p.second; y < pH; ++y) { for (int x = p.first; x < pW; ++x) { - l.push(R[y][x] + G[y][x] + B[y][x]); + l.push_back(R[y][x] + G[y][x] + B[y][x]); } } } - - bright_lim = get_percentile(l, 0.95); + const int pos = l.size() * 0.95; + std::nth_element(l.begin(), l.begin() + pos, l.end()); + bright_lim = l[pos]; } double rr = 0, gg = 0, bb = 0; @@ -190,7 +252,6 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c return darklim > 0 ? -1.125f * std::log(darklim) : std::log(std::numeric_limits::max()) / 2; } - void extract_channels(Imagefloat *img, array2D &r, array2D &g, array2D &b, int radius, float epsilon, bool multithread) { const int W = img->getWidth(); @@ -211,12 +272,12 @@ void extract_channels(Imagefloat *img, array2D &r, array2D &g, arr void ImProcFunctions::dehaze(Imagefloat *img) { - if (!params->dehaze.enabled) { + if (!params->dehaze.enabled || params->dehaze.strength == 0.0) { return; } - img->normalizeFloatTo1(); - + const float maxChannel = normalize(img, multiThread); + const int W = img->getWidth(); const int H = img->getHeight(); const float strength = LIM01(float(params->dehaze.strength) / 100.f * 0.9f); @@ -229,21 +290,47 @@ void ImProcFunctions::dehaze(Imagefloat *img) int patchsize = max(int(5 / scale), 2); float ambient[3]; - array2D &t_tilde = dark; - float max_t = 0.f; + float maxDistance = 0.f; { - int npatches = 0; - array2D R(W, H); + array2D& R = dark; // R and dark can safely use the same buffer, which is faster and reduces memory allocations/deallocations array2D G(W, H); array2D B(W, H); extract_channels(img, R, G, B, patchsize, 1e-1, multiThread); - - patchsize = max(max(W, H) / 600, 2); - npatches = get_dark_channel(R, G, B, dark, patchsize, nullptr, false, multiThread); - DEBUG_DUMP(dark); - max_t = estimate_ambient_light(R, G, B, dark, patchsize, npatches, ambient); + { + constexpr int sizecap = 200; + const float r = static_cast(W) / static_cast(H); + const int hh = r >= 1.f ? sizecap : sizecap / r; + const int ww = r >= 1.f ? sizecap * r : sizecap; + + if (W <= ww && H <= hh) { + // don't rescale small thumbs + array2D D(W, H); + const int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread); + maxDistance = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient); + } else { + array2D RR(ww, hh); + array2D GG(ww, hh); + array2D BB(ww, hh); + rescaleNearest(R, RR, multiThread); + rescaleNearest(G, GG, multiThread); + rescaleNearest(B, BB, multiThread); + array2D D(ww, hh); + + const int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread); + maxDistance = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient); + } + } + + if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { + if (options.rtSettings.verbose) { + std::cout << "dehaze: no haze detected" << std::endl; + } + restore(img, maxChannel, multiThread); + return; // probably no haze at all + } + patchsize = max(max(W, H) / 600, 2); if (options.rtSettings.verbose) { std::cout << "dehaze: ambient light is " @@ -251,78 +338,95 @@ void ImProcFunctions::dehaze(Imagefloat *img) << std::endl; } - get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread); - } - - if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { - if (options.rtSettings.verbose) { - std::cout << "dehaze: no haze detected" << std::endl; - } - img->normalizeFloatTo65535(); - return; // probably no haze at all - } - - DEBUG_DUMP(t_tilde); - -#ifdef _OPENMP - #pragma omp parallel for if (multiThread) -#endif - for (int y = 0; y < H; ++y) { - for (int x = 0; x < W; ++x) { - dark[y][x] = 1.f - strength * dark[y][x]; - } + get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread, strength); } const int radius = patchsize * 4; - const float epsilon = 1e-5; - array2D &t = t_tilde; + constexpr float epsilon = 1e-5f; - { - array2D guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE); - guidedFilter(guideB, t_tilde, t, radius, epsilon, multiThread); - } + array2D guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE); + guidedFilter(guideB, dark, dark, radius, epsilon, multiThread); - DEBUG_DUMP(t); - if (options.rtSettings.verbose) { - std::cout << "dehaze: max distance is " << max_t << std::endl; + std::cout << "dehaze: max distance is " << maxDistance << std::endl; } - float depth = -float(params->dehaze.depth) / 100.f; - const float t0 = max(1e-3f, std::exp(depth * max_t)); + const float depth = -float(params->dehaze.depth) / 100.f; + const float t0 = max(1e-3f, std::exp(depth * maxDistance)); const float teps = 1e-3f; + + const bool luminance = params->dehaze.luminance; + const TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); +#ifdef __SSE2__ + const vfloat wsv[3] = {F2V(ws[1][0]), F2V(ws[1][1]),F2V(ws[1][2])}; +#endif + const float ambientY = Color::rgbLuminance(ambient[0], ambient[1], ambient[2], ws); #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif for (int y = 0; y < H; ++y) { - for (int x = 0; x < W; ++x) { + int x = 0; +#ifdef __SSE2__ + const vfloat onev = F2V(1.f); + const vfloat ambient0v = F2V(ambient[0]); + const vfloat ambient1v = F2V(ambient[1]); + const vfloat ambient2v = F2V(ambient[2]); + const vfloat ambientYv = F2V(ambientY); + const vfloat epsYv = F2V(1e-5f); + const vfloat t0v = F2V(t0); + const vfloat tepsv = F2V(teps); + const vfloat cmaxChannelv = F2V(maxChannel); + for (; x < W - 3; x += 4) { // ensure that the transmission is such that to avoid clipping... - float rgb[3] = { img->r(y, x), img->g(y, x), img->b(y, x) }; + const vfloat r = LVFU(img->r(y, x)); + const vfloat g = LVFU(img->g(y, x)); + const vfloat b = LVFU(img->b(y, x)); // ... t >= tl to avoid negative values - float tl = 1.f - min(rgb[0]/ambient[0], rgb[1]/ambient[1], rgb[2]/ambient[2]); - // ... t >= tu to avoid values > 1 - float tu = t0 - teps; - for (int c = 0; c < 3; ++c) { - if (ambient[c] < 1) { - tu = max(tu, (rgb[c] - ambient[c])/(1.f - ambient[c])); - } - } - float mt = max(t[y][x], t0, tl + teps, tu + teps); + const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v)); + const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv + tepsv, t0v)); if (params->dehaze.showDepthMap) { - img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt); + const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * cmaxChannelv; + STVFU(img->r(y, x), valv); + STVFU(img->g(y, x), valv); + STVFU(img->b(y, x), valv); + } else if (luminance) { + const vfloat Yv = Color::rgbLuminance(r, g, b, wsv); + const vfloat YYv = (Yv - ambientYv) / mtv + ambientYv; + const vfloat fv = vself(vmaskf_gt(Yv, epsYv), cmaxChannelv * YYv / Yv, cmaxChannelv); + STVFU(img->r(y, x), r * fv); + STVFU(img->g(y, x), g * fv); + STVFU(img->b(y, x), b * fv); } else { - float r = (rgb[0] - ambient[0]) / mt + ambient[0]; - float g = (rgb[1] - ambient[1]) / mt + ambient[1]; - float b = (rgb[2] - ambient[2]) / mt + ambient[2]; - - img->r(y, x) = r; - img->g(y, x) = g; - img->b(y, x) = b; + STVFU(img->r(y, x), ((r - ambient0v) / mtv + ambient0v) * cmaxChannelv); + STVFU(img->g(y, x), ((g - ambient1v) / mtv + ambient1v) * cmaxChannelv); + STVFU(img->b(y, x), ((b - ambient2v) / mtv + ambient2v) * cmaxChannelv); + } + } +#endif + for (; x < W; ++x) { + // ensure that the transmission is such that to avoid clipping... + const float r = img->r(y, x); + const float g = img->g(y, x); + const float b = img->b(y, x); + // ... t >= tl to avoid negative values + const float tl = 1.f - min(r / ambient[0], g / ambient[1], b / ambient[2]); + const float mt = max(dark[y][x], t0, tl + teps); + if (params->dehaze.showDepthMap) { + img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * maxChannel; + } else if (luminance) { + const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws); + const float YY = (Y - ambientY) / mt + ambientY; + const float f = Y > 1e-5f ? maxChannel * YY / Y : maxChannel; + img->r(y, x) *= f; + img->g(y, x) *= f; + img->b(y, x) *= f; + } else { + img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * maxChannel; + img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * maxChannel; + img->b(y, x) = ((b - ambient[2]) / mt + ambient[2]) * maxChannel; } } } - - img->normalizeFloatTo65535(); } diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index bd91ae8e7..68960794e 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -2539,7 +2539,8 @@ DehazeParams::DehazeParams() : enabled(false), strength(50), showDepthMap(false), - depth(25) + depth(25), + luminance(false) { } @@ -2549,7 +2550,8 @@ bool DehazeParams::operator ==(const DehazeParams& other) const enabled == other.enabled && strength == other.strength && showDepthMap == other.showDepthMap - && depth == other.depth; + && depth == other.depth + && luminance == other.luminance; } bool DehazeParams::operator !=(const DehazeParams& other) const @@ -3260,6 +3262,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->dehaze.strength, "Dehaze", "Strength", dehaze.strength, keyFile); saveToKeyfile(!pedited || pedited->dehaze.showDepthMap, "Dehaze", "ShowDepthMap", dehaze.showDepthMap, keyFile); saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Depth", dehaze.depth, keyFile); + saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Luminance", dehaze.luminance, keyFile); // Directional pyramid denoising saveToKeyfile(!pedited || pedited->dirpyrDenoise.enabled, "Directional Pyramid Denoising", "Enabled", dirpyrDenoise.enabled, keyFile); @@ -4922,6 +4925,7 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited) assignFromKeyfile(keyFile, "Dehaze", "Strength", pedited, dehaze.strength, pedited->dehaze.strength); assignFromKeyfile(keyFile, "Dehaze", "ShowDepthMap", pedited, dehaze.showDepthMap, pedited->dehaze.showDepthMap); assignFromKeyfile(keyFile, "Dehaze", "Depth", pedited, dehaze.depth, pedited->dehaze.depth); + assignFromKeyfile(keyFile, "Dehaze", "Luminance", pedited, dehaze.luminance, pedited->dehaze.luminance); } if (keyFile.has_group("Film Simulation")) { diff --git a/rtengine/procparams.h b/rtengine/procparams.h index 82dfe9697..4d8f66ebd 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -1353,6 +1353,7 @@ struct DehazeParams { int strength; bool showDepthMap; int depth; + bool luminance; DehazeParams(); diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 3000c1c10..cce88df5d 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1390,6 +1390,18 @@ static inline float vhadd( vfloat a ) { return _mm_cvtss_f32(_mm_add_ss(a, _mm_shuffle_ps(a, a, 1))); } +static inline float vhmin(vfloat a) { + // returns min(a[0], a[1], a[2], a[3]) + a = vminf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vminf(a, _mm_shuffle_ps(a, a, 1))); +} + +static inline float vhmax(vfloat a) { + // returns max(a[0], a[1], a[2], a[3]) + a = vmaxf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vmaxf(a, _mm_shuffle_ps(a, a, 1))); +} + static INLINE vfloat vmul2f(vfloat a){ // fastest way to multiply by 2 return a + a; diff --git a/rtgui/dehaze.cc b/rtgui/dehaze.cc index 6f60d08d6..6b7fcd64f 100644 --- a/rtgui/dehaze.cc +++ b/rtgui/dehaze.cc @@ -36,6 +36,7 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false, EvDehazeStrength = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_STRENGTH"); EvDehazeShowDepthMap = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP"); EvDehazeDepth = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_DEPTH"); + EvDehazeLuminance = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_LUMINANCE"); strength = Gtk::manage(new Adjuster(M("TP_DEHAZE_STRENGTH"), 0., 100., 1., 50.)); strength->setAdjusterListener(this); @@ -45,12 +46,17 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false, depth->setAdjusterListener(this); depth->show(); + luminance = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_LUMINANCE"))); + luminance->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::luminanceChanged)); + luminance->show(); + showDepthMap = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_SHOW_DEPTH_MAP"))); showDepthMap->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::showDepthMapChanged)); showDepthMap->show(); pack_start(*strength); pack_start(*depth); + pack_start(*luminance); pack_start(*showDepthMap); } @@ -64,12 +70,14 @@ void Dehaze::read(const ProcParams *pp, const ParamsEdited *pedited) depth->setEditedState(pedited->dehaze.depth ? Edited : UnEdited); set_inconsistent(multiImage && !pedited->dehaze.enabled); showDepthMap->set_inconsistent(!pedited->dehaze.showDepthMap); + luminance->set_inconsistent(!pedited->dehaze.luminance); } setEnabled(pp->dehaze.enabled); strength->setValue(pp->dehaze.strength); depth->setValue(pp->dehaze.depth); showDepthMap->set_active(pp->dehaze.showDepthMap); + luminance->set_active(pp->dehaze.luminance); enableListener(); } @@ -81,12 +89,14 @@ void Dehaze::write(ProcParams *pp, ParamsEdited *pedited) pp->dehaze.depth = depth->getValue(); pp->dehaze.enabled = getEnabled(); pp->dehaze.showDepthMap = showDepthMap->get_active(); + pp->dehaze.luminance = luminance->get_active(); if (pedited) { pedited->dehaze.strength = strength->getEditedState(); pedited->dehaze.depth = depth->getEditedState(); pedited->dehaze.enabled = !get_inconsistent(); pedited->dehaze.showDepthMap = !showDepthMap->get_inconsistent(); + pedited->dehaze.luminance = !luminance->get_inconsistent(); } } @@ -138,6 +148,12 @@ void Dehaze::showDepthMapChanged() } } +void Dehaze::luminanceChanged() +{ + if (listener) { + listener->panelChanged(EvDehazeLuminance, luminance->get_active() ? M("GENERAL_ENABLED") : M("GENERAL_DISABLED")); + } +} void Dehaze::setBatchMode(bool batchMode) { diff --git a/rtgui/dehaze.h b/rtgui/dehaze.h index 3120dfc91..6a9d31cd1 100644 --- a/rtgui/dehaze.h +++ b/rtgui/dehaze.h @@ -28,12 +28,14 @@ class Dehaze: public ToolParamBlock, public AdjusterListener, public FoldableToo private: Adjuster *strength; Adjuster *depth; - Gtk::CheckButton *showDepthMap; + Gtk::CheckButton *showDepthMap; + Gtk::CheckButton *luminance; rtengine::ProcEvent EvDehazeEnabled; rtengine::ProcEvent EvDehazeStrength; rtengine::ProcEvent EvDehazeDepth; rtengine::ProcEvent EvDehazeShowDepthMap; + rtengine::ProcEvent EvDehazeLuminance; public: @@ -47,6 +49,7 @@ public: void adjusterChanged(Adjuster *a, double newval) override; void enabledChanged() override; void showDepthMapChanged(); + void luminanceChanged(); void setAdjusterBehavior(bool strengthAdd); }; diff --git a/rtgui/extprog.cc b/rtgui/extprog.cc index a6a9050c0..95c1c937d 100644 --- a/rtgui/extprog.cc +++ b/rtgui/extprog.cc @@ -58,7 +58,7 @@ bool ExtProgAction::execute (const std::vector& fileNames) const } for (const auto& fileName : fileNames) { - cmdLine += " \"" + fileName + "\""; + cmdLine += " " + Glib::shell_quote(fileName); } return ExtProgStore::spawnCommandAsync (cmdLine); @@ -256,7 +256,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName) #else - auto cmdLine = Glib::ustring("gimp \"") + fileName + Glib::ustring("\""); + auto cmdLine = Glib::ustring("gimp ") + Glib::shell_quote(fileName); auto success = spawnCommandAsync (cmdLine); #endif @@ -291,7 +291,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName) #else - cmdLine = Glib::ustring("gimp-remote \"") + fileName + Glib::ustring("\""); + cmdLine = Glib::ustring("gimp-remote ") + Glib::shell_quote(fileName); success = ExtProgStore::spawnCommandAsync (cmdLine); #endif @@ -312,7 +312,7 @@ bool ExtProgStore::openInPhotoshop (const Glib::ustring& fileName) #else - const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + Glib::ustring("\" \"") + fileName + Glib::ustring("\""); + const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + "\" " + Glib::shell_quote(fileName); #endif @@ -334,7 +334,7 @@ bool ExtProgStore::openInCustomEditor (const Glib::ustring& fileName) #else - const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + Glib::ustring("\" \"") + fileName + Glib::ustring("\""); + const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + "\" " + Glib::shell_quote(fileName); return spawnCommandAsync (cmdLine); #endif diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index 1cdcacf13..f6561077c 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -598,6 +598,7 @@ void ParamsEdited::set(bool v) dehaze.strength = v; dehaze.showDepthMap = v; dehaze.depth = v; + dehaze.luminance = v; metadata.mode = v; filmNegative.enabled = v; filmNegative.redRatio = v; @@ -1180,6 +1181,7 @@ void ParamsEdited::initFrom(const std::vector& dehaze.strength = dehaze.strength && p.dehaze.strength == other.dehaze.strength; dehaze.showDepthMap = dehaze.showDepthMap && p.dehaze.showDepthMap == other.dehaze.showDepthMap; dehaze.depth = dehaze.depth && p.dehaze.depth == other.dehaze.depth; + dehaze.luminance = dehaze.luminance && p.dehaze.luminance == other.dehaze.luminance; metadata.mode = metadata.mode && p.metadata.mode == other.metadata.mode; filmNegative.enabled = filmNegative.enabled && p.filmNegative.enabled == other.filmNegative.enabled; filmNegative.redRatio = filmNegative.redRatio && p.filmNegative.redRatio == other.filmNegative.redRatio; @@ -3290,6 +3292,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng toEdit.dehaze.showDepthMap = mods.dehaze.showDepthMap; } + if (dehaze.luminance) { + toEdit.dehaze.luminance = mods.dehaze.luminance; + } + if (metadata.mode) { toEdit.metadata.mode = mods.metadata.mode; } diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h index 0f4ad85ea..fc3cd4b7a 100644 --- a/rtgui/paramsedited.h +++ b/rtgui/paramsedited.h @@ -607,6 +607,7 @@ struct DehazeParamsEdited { bool strength; bool showDepthMap; bool depth; + bool luminance; }; struct RAWParamsEdited { diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc index f25e44e69..d0ccc43a8 100644 --- a/rtgui/pdsharpening.cc +++ b/rtgui/pdsharpening.cc @@ -18,22 +18,28 @@ */ #include -#include "eventmapper.h" +#include + #include "pdsharpening.h" + +#include "eventmapper.h" #include "options.h" + #include "../rtengine/procparams.h" using namespace rtengine; using namespace rtengine::procparams; -PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDSHARPENING_LABEL"), false, true) +PdSharpening::PdSharpening() : + FoldableToolPanel(this, "capturesharpening", M("TP_PDSHARPENING_LABEL"), false, true), + lastAutoContrast(true), + lastAutoRadius(true) { - auto m = ProcEventMapper::getInstance(); EvPdShrContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_CONTRAST"); EvPdSharpenGamma = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_GAMMA"); EvPdShrDRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS"); - EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_OFFSET"); + EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_BOOST"); EvPdShrDIterations = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_ITERATIONS"); EvPdShrAutoContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_CONTRAST"); EvPdShrAutoRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_RADIUS"); @@ -42,7 +48,7 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS hb->show(); contrast = Gtk::manage(new Adjuster(M("TP_SHARPENING_CONTRAST"), 0, 200, 1, 10)); contrast->setAdjusterListener(this); - contrast->addAutoButton(M("TP_RAW_DUALDEMOSAICAUTOCONTRAST_TOOLTIP")); + contrast->addAutoButton(); contrast->setAutoValue(true); pack_start(*contrast); @@ -53,9 +59,9 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS Gtk::VBox* rld = Gtk::manage(new Gtk::VBox()); gamma = Gtk::manage(new Adjuster(M("TP_SHARPENING_GAMMA"), 0.5, 6.0, 0.05, 1.00)); dradius = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS"), 0.4, 1.15, 0.01, 0.75)); - dradius->addAutoButton(M("TP_PDSHARPENING_AUTORADIUS_TOOLTIP")); + dradius->addAutoButton(); dradius->setAutoValue(true); - dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_OFFSET"), 0.0, 0.5, 0.01, 0.0)); + dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_BOOST"), -0.5, 0.5, 0.01, 0.0)); diter = Gtk::manage(new Adjuster(M("TP_SHARPENING_RLD_ITERATIONS"), 1, 100, 1, 20)); rld->pack_start(*gamma); rld->pack_start(*dradius);