From 0c1c2152622fbbc09f3fac99cf4687fdf77b0fff Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 15 Sep 2019 21:57:17 +0200 Subject: [PATCH 01/31] Improve readability of hasselblad_load_raw() code, #5434 --- rtengine/dcraw.cc | 121 +++++++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 51 deletions(-) diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index e15a2bb0f..5e85b1e2e 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -2417,59 +2417,78 @@ void CLASS hasselblad_correct() void CLASS hasselblad_load_raw() { - struct jhead jh; - int shot, row, col, *back[5], len[2], diff[12], pred, sh, f, s, c; - unsigned upix, urow, ucol; - ushort *ip; + struct jhead jh; + int *back[5], diff[12]; - if (!ljpeg_start (&jh, 0)) return; - order = 0x4949; - ph1_bithuff_t ph1_bithuff(this, ifp, order); - hb_bits(-1); - back[4] = (int *) calloc (raw_width, 3*sizeof **back); - merror (back[4], "hasselblad_load_raw()"); - FORC3 back[c] = back[4] + c*raw_width; - cblack[6] >>= sh = tiff_samples > 1; - shot = LIM(shot_select, 1, tiff_samples) - 1; - for (row=0; row < raw_height; row++) { - FORC4 back[(c+3) & 3] = back[c]; - for (col=0; col < raw_width; col+=2) { - for (s=0; s < tiff_samples*2; s+=2) { - FORC(2) len[c] = ph1_huff(jh.huff[0]); - FORC(2) { - diff[s+c] = hb_bits(len[c]); - if ((diff[s+c] & (1 << (len[c]-1))) == 0) - diff[s+c] -= (1 << len[c]) - 1; - if (diff[s+c] == 65535) diff[s+c] = -32768; - } - } - for (s=col; s < col+2; s++) { - pred = 0x8000 + load_flags; - if (col) pred = back[2][s-2]; - if (col && row > 1) switch (jh.psv) { - case 11: pred += back[0][s]/2 - back[0][s-2]/2; break; - } - f = (row & 1)*3 ^ ((col+s) & 1); - FORC (tiff_samples) { - pred += diff[(s & 1)*tiff_samples+c]; - upix = pred >> sh & 0xffff; - if (raw_image && c == shot) - RAW(row,s) = upix; - if (image) { - urow = row-top_margin + (c & 1); - ucol = col-left_margin - ((c >> 1) & 1); - ip = &image[urow*width+ucol][f]; - if (urow < height && ucol < width) - *ip = c < 4 ? upix : (*ip + upix) >> 1; - } - } - back[2][s] = pred; - } + if (!ljpeg_start (&jh, 0)) { + return; + } + order = 0x4949; + ph1_bithuff_t ph1_bithuff(this, ifp, order); + hb_bits(-1); + back[4] = (int *) calloc(raw_width, 3 * sizeof **back); + merror(back[4], "hasselblad_load_raw()"); + for (int c = 0; c < 3; ++c) { + back[c] = back[4] + c * raw_width; + } + const int sh = tiff_samples > 1; + cblack[6] >>= sh; + const int shot = LIM(shot_select, 1, tiff_samples) - 1; + for (int row = 0; row < raw_height; ++row) { + for (int c = 0; c < 4; ++c) { + back[(c + 3) & 3] = back[c]; + } + for (int col = 0; col < raw_width; col += 2) { + for (int s = 0; s < tiff_samples * 2; s += 2) { + int len[2]; + for (int c = 0; c < 2; ++c) { + len[c] = ph1_huff(jh.huff[0]); + } + for (int c = 0; c < 2; ++c) { + diff[s + c] = hb_bits(len[c]); + if ((diff[s + c] & (1 << (len[c] - 1))) == 0) { + diff[s + c] -= (1 << len[c]) - 1; + } + if (diff[s + c] == 65535) { + diff[s + c] = -32768; + } + } + } + for (int s = col; s < col + 2; ++s) { + int pred; + if (col) { + pred = back[2][s - 2]; + if (row > 1 && jh.psv == 11) { + pred += back[0][s] / 2 - back[0][s - 2] / 2; + } + } else { + pred = 0x8000 + load_flags; + } + for (int c = 0; c < tiff_samples; ++c) { + pred += diff[(s & 1) * tiff_samples + c]; + const unsigned upix = pred >> sh & 0xffff; + if (raw_image && c == shot) { + RAW(row, s) = upix; + } + if (image) { + const int f = (row & 1) * 3 ^ ((col + s) & 1); + const unsigned urow = row - top_margin + (c & 1); + const unsigned ucol = col - left_margin - ((c >> 1) & 1); + ushort* const ip = &image[urow * width + ucol][f]; + if (urow < height && ucol < width) { + *ip = c < 4 ? upix : (*ip + upix) >> 1; + } + } + } + back[2][s] = pred; + } + } + } + free(back[4]); + ljpeg_end(&jh); + if (image) { + mix_green = 1; } - } - free (back[4]); - ljpeg_end (&jh); - if (image) mix_green = 1; } void CLASS leaf_hdr_load_raw() From a0c6c1569c1f7574952341e4be4a8a0859ba811a Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 16 Sep 2019 21:43:03 +0200 Subject: [PATCH 02/31] Fix indentations --- rtengine/dcraw.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index 5e85b1e2e..5275c42c1 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -2441,9 +2441,9 @@ void CLASS hasselblad_load_raw() for (int col = 0; col < raw_width; col += 2) { for (int s = 0; s < tiff_samples * 2; s += 2) { int len[2]; - for (int c = 0; c < 2; ++c) { - len[c] = ph1_huff(jh.huff[0]); - } + for (int c = 0; c < 2; ++c) { + len[c] = ph1_huff(jh.huff[0]); + } for (int c = 0; c < 2; ++c) { diff[s + c] = hb_bits(len[c]); if ((diff[s + c] & (1 << (len[c] - 1))) == 0) { @@ -2455,15 +2455,15 @@ void CLASS hasselblad_load_raw() } } for (int s = col; s < col + 2; ++s) { - int pred; - if (col) { + int pred; + if (col) { pred = back[2][s - 2]; if (row > 1 && jh.psv == 11) { pred += back[0][s] / 2 - back[0][s - 2] / 2; } - } else { - pred = 0x8000 + load_flags; - } + } else { + pred = 0x8000 + load_flags; + } for (int c = 0; c < tiff_samples; ++c) { pred += diff[(s & 1) * tiff_samples + c]; const unsigned upix = pred >> sh & 0xffff; From 991fc94d89e08ce625256ca4a146d7151ae5c5fd Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 17 Sep 2019 15:11:12 +0200 Subject: [PATCH 03/31] Speedup for guided filter --- rtengine/guidedfilter.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index 0ebe6c172..bc7f64f05 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -207,9 +207,6 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 apply(SUBMUL, b, a, meanI, meanp); DEBUG_DUMP(b); - meanI.free(); // frees w * h * 4 byte - meanp.free(); // frees w * h * 4 byte - array2D &meana = a; f_mean(meana, a, r1); DEBUG_DUMP(meana); @@ -218,18 +215,25 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 f_mean(meanb, b, r1); DEBUG_DUMP(meanb); - blur_buf.resize(0); // frees w * h * 4 byte + const int Ws = meana.width(); + const int Hs = meana.height(); + const int Wd = q.width(); + const int Hd = q.height(); - array2D meanA(W, H); - f_upsample(meanA, meana); - DEBUG_DUMP(meanA); + float col_scale = float (Ws) / float (Wd); + float row_scale = float (Hs) / float (Hd); - array2D &meanB = q; - f_upsample(meanB, meanb); - DEBUG_DUMP(meanB); +#ifdef _OPENMP + #pragma omp parallel for if (multithread) +#endif - apply(ADDMUL, q, meanA, I, meanB); - DEBUG_DUMP(q); + for (int y = 0; y < Hd; ++y) { + float ymrs = y * row_scale; + + for (int x = 0; x < Wd; ++x) { + q[y][x] = getBilinearValue(meana, x * col_scale, ymrs) * I[y][x] + getBilinearValue(meanb, x * col_scale, ymrs); + } + } } } // namespace rtengine From 3ab379ad0a3871f4cd8b1ce933dfa7a8d37b001b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 19 Sep 2019 20:56:33 +0200 Subject: [PATCH 04/31] Dehaze: further speedup, #5456 --- rtengine/boxblur.h | 255 +++++++++++++++++++++++++++++++++++--- rtengine/guidedfilter.cc | 187 ++++++++++------------------ rtengine/ipdehaze.cc | 260 +++++++++++++++++++++------------------ 3 files changed, 442 insertions(+), 260 deletions(-) diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index da302964b..3020278b2 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -204,15 +204,15 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, tempv = tempv / lenv; temp1v = temp1v / lenv; - STVFU( dst[0][col], tempv); - STVFU( dst[0][col + 4], temp1v); + STVFU(dst[0][col], tempv); + STVFU(dst[0][col + 4], temp1v); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); lenv = lenp1v; } @@ -221,16 +221,16 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU( dst[row][col], tempv); - STVFU( dst[row][col + 4], temp1v); + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); lenv = lenm1v; } } @@ -312,6 +312,221 @@ template void boxblur (T** src, A** dst, T* buffer, int radx, } +inline void boxblur (float** src, float** dst, int radius, int W, int H, bool multiThread) +{ + //box blur using rowbuffers and linebuffers instead of a full size buffer + + if (radius == 0) { + if (src != dst) { +#ifdef _OPENMP + #pragma omp parallel for if (multiThread) +#endif + + for (int row = 0; row < H; row++) { + for (int col = 0; col < W; col++) { + dst[row][col] = src[row][col]; + } + } + } + return; + } + + constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache +#ifdef _OPENMP + #pragma omp parallel if (multiThread) +#endif + { + float* const buffer = new float[std::max(W, 8 * H)]; + //horizontal blur + float* const lineBuffer = buffer; +#ifdef _OPENMP + #pragma omp for +#endif + for (int row = 0; row < H; row++) { + float len = radius + 1; + float tempval = src[row][0]; + lineBuffer[0] = tempval; + for (int j = 1; j <= radius; j++) { + tempval += src[row][j]; + } + + tempval /= len; + dst[row][0] = tempval; + + for (int col = 1; col <= radius; col++) { + lineBuffer[col] = src[row][col]; + dst[row][col] = tempval = (tempval * len + src[row][col + radius]) / (len + 1); + len ++; + } + + for (int col = radius + 1; col < W - radius; col++) { + lineBuffer[col] = src[row][col]; + dst[row][col] = tempval = tempval + (src[row][col + radius] - lineBuffer[col - radius - 1]) / len; + } + + for (int col = W - radius; col < W; col++) { + dst[row][col] = tempval = (tempval * len - lineBuffer[col - radius - 1]) / (len - 1); + len --; + } + } + + //vertical blur +#ifdef __SSE2__ + vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer; + vfloat leninitv = F2V(radius + 1); + vfloat onev = F2V(1.f); + vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; + +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - 7; col += 8) { + lenv = leninitv; + tempv = LVFU(dst[0][col]); + temp1v = LVFU(dst[0][col + 4]); + rowBuffer[0][0] = tempv; + rowBuffer[0][1] = temp1v; + + for (int i = 1; i <= radius; i++) { + tempv = tempv + LVFU(dst[i][col]); + temp1v = temp1v + LVFU(dst[i][col + 4]); + } + + tempv = tempv / lenv; + temp1v = temp1v / lenv; + STVFU(dst[0][col], tempv); + STVFU(dst[0][col + 4], temp1v); + + for (int row = 1; row <= radius; row++) { + rowBuffer[row][0] = LVFU(dst[row][col]); + rowBuffer[row][1] = LVFU(dst[row][col + 4]); + lenp1v = lenv + onev; + tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v; + temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenp1v; + } + + rlenv = onev / lenv; + + for (int row = radius + 1; row < H - radius; row++) { + rowBuffer[row][0] = LVFU(dst[row][col]); + rowBuffer[row][1] = LVFU(dst[row][col + 4]); + tempv = tempv + (LVFU(dst[row + radius][col]) - rowBuffer[row - radius - 1][0]) * rlenv ; + temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - rowBuffer[row - radius - 1][1]) * rlenv ; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + } + + for (int row = H - radius; row < H; row++) { + lenm1v = lenv - onev; + tempv = (tempv * lenv - rowBuffer[row - radius - 1][0]) / lenm1v; + temp1v = (temp1v * lenv - rowBuffer[row - radius - 1][1]) / lenm1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenm1v; + } + } + +#else + float (* const rowBuffer)[8] = (float(*)[8]) buffer; +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - numCols + 1; col += 8) { + float len = radius + 1; + + for(int k = 0; k < numCols; k++) { + rowBuffer[0][k] = dst[0][col + k]; + } + + for (int i = 1; i <= radius; i++) { + for(int k = 0; k < numCols; k++) { + dst[0][col + k] += dst[i][col + k]; + } + } + + for(int k = 0; k < numCols; k++) { + dst[0][col + k] /= len; + } + + for (int row = 1; row <= radius; row++) { + for(int k = 0; k < numCols; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + + len ++; + } + + for (int row = radius + 1; row < H - radius; row++) { + for(int k = 0; k < numCols; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) / len; + } + } + + for (int row = H - radius; row < H; row++) { + for(int k = 0; k < numCols; k++) { + dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1); + } + + len --; + } + } + +#endif + //vertical blur, remaining columns +#ifdef _OPENMP + #pragma omp single +#endif + { + const int remaining = W % numCols; + if (remaining > 0) { + float (* const rowBuffer)[8] = (float(*)[8]) buffer; + const int col = W - remaining; + + float len = radius + 1; + for(int k = 0; k < remaining; k++) { + rowBuffer[0][k] = dst[0][col + k]; + } + for (int i = 1; i <= radius; i++) { + for(int k = 0; k < remaining; k++) { + dst[0][col + k] += dst[i][col + k]; + } + } + for(int k = 0; k < remaining; k++) { + dst[0][col + k] /= len; + } + for (int row = 1; row <= radius; row++) { + for(int k = 0; k < remaining; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[(row - 1)][col + k] * len + dst[row + radius][col + k]) / (len + 1); + len ++; + } + } + const float rlen = 1.f / len; + for (int row = radius + 1; row < H - radius; row++) { + for(int k = 0; k < remaining; k++) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = dst[(row - 1)][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) * rlen; + } + } + for (int row = H - radius; row < H; row++) { + for(int k = 0; k < remaining; k++) { + dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1); + len --; + } + } + } + } + delete [] buffer; + } +} + template void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H) { //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) @@ -382,15 +597,15 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in tempv = tempv / lenv; temp1v = temp1v / lenv; - STVFU( dst[0 * W + col], tempv); - STVFU( dst[0 * W + col + 4], temp1v); + STVFU(dst[0 * W + col], tempv); + STVFU(dst[0 * W + col + 4], temp1v); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); lenv = lenp1v; } @@ -399,16 +614,16 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU( dst[row * W + col], tempv); - STVFU( dst[row * W + col + 4], temp1v); + STVFU(dst[row * W + col], tempv); + STVFU(dst[row * W + col + 4], temp1v); lenv = lenm1v; } } @@ -422,12 +637,12 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in } tempv = tempv / lenv; - STVFU( dst[0 * W + col], tempv); + STVFU(dst[0 * W + col], tempv); for (int row = 1; row <= rady; row++) { lenp1v = lenv + onev; tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); lenv = lenp1v; } @@ -435,13 +650,13 @@ template void boxblur (T* src, A* dst, A* buffer, int radx, in for (int row = rady + 1; row < H - rady; row++) { tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); } for (int row = H - rady; row < H; row++) { lenm1v = lenv - onev; tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; - STVFU( dst[row * W + col], tempv); + STVFU(dst[row * W + col], tempv); lenv = lenm1v; } } diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index bc7f64f05..8d19fc7a5 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -3,6 +3,7 @@ * This file is part of RawTherapee. * * Copyright (c) 2018 Alberto Griggio + * Optimized 2019 Ingo Weyrich * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,9 +17,9 @@ * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . - */ +*/ -/** +/* * This is a Fast Guided Filter implementation, derived directly from the * pseudo-code of the paper: * @@ -26,32 +27,16 @@ * by Kaiming He, Jian Sun * * available at https://arxiv.org/abs/1505.00996 - */ +*/ #include "guidedfilter.h" #include "boxblur.h" #include "rescale.h" #include "imagefloat.h" - +#define BENCHMARK +#include "StopWatch.h" namespace rtengine { -#if 0 -# define DEBUG_DUMP(arr) \ - do { \ - Imagefloat im(arr.width(), arr.height()); \ - const char *out = "/tmp/" #arr ".tif"; \ - for (int y = 0; y < im.getHeight(); ++y) { \ - for (int x = 0; x < im.getWidth(); ++x) { \ - im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \ - } \ - } \ - im.saveTIFF(out, 16); \ - } while (false) -#else -# define DEBUG_DUMP(arr) -#endif - - namespace { int calculate_subsampling(int w, int h, int r) @@ -78,15 +63,7 @@ int calculate_subsampling(int w, int h, int r) void guidedFilter(const array2D &guide, const array2D &src, array2D &dst, int r, float epsilon, bool multithread, int subsampling) { - - const int W = src.width(); - const int H = src.height(); - - if (subsampling <= 0) { - subsampling = calculate_subsampling(W, H, r); - } - - enum Op { MUL, DIVEPSILON, ADD, SUB, ADDMUL, SUBMUL }; + enum Op {MUL, DIVEPSILON, SUBMUL}; const auto apply = [=](Op op, array2D &res, const array2D &a, const array2D &b, const array2D &c=array2D()) -> void @@ -99,139 +76,107 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 #endif for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) { - float r; - float aa = a[y][x]; - float bb = b[y][x]; switch (op) { - case MUL: - r = aa * bb; - break; - case DIVEPSILON: - r = aa / (bb + epsilon); - break; - case ADD: - r = aa + bb; - break; - case SUB: - r = aa - bb; - break; - case ADDMUL: - r = aa * bb + c[y][x]; - break; - case SUBMUL: - r = c[y][x] - (aa * bb); - break; - default: - assert(false); - r = 0; - break; + case MUL: + res[y][x] = a[y][x] * b[y][x]; + break; + case DIVEPSILON: + res[y][x] = a[y][x] / (b[y][x] + epsilon); // note: the value of epsilon intentionally has an impact on the result. It is not only to avoid divisions by zero + break; + case SUBMUL: + res[y][x] = c[y][x] - (a[y][x] * b[y][x]); + break; + default: + assert(false); + res[y][x] = 0; + break; } - res[y][x] = r; } } }; - // use the terminology of the paper (Algorithm 2) - const array2D &I = guide; - const array2D &p = src; - array2D &q = dst; - const auto f_subsample = [=](array2D &d, const array2D &s) -> void { rescaleBilinear(s, d, multithread); }; - const auto f_upsample = f_subsample; - - const size_t w = W / subsampling; - const size_t h = H / subsampling; - - AlignedBuffer blur_buf(w * h); const auto f_mean = [&](array2D &d, array2D &s, int rad) -> void { rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1); - float **src = s; - float **dst = d; -#ifdef _OPENMP - #pragma omp parallel if (multithread) -#endif - boxblur(src, dst, blur_buf.data, rad, rad, s.width(), s.height()); + boxblur(s, d, rad, s.width(), s.height(), multithread); }; + const int W = src.width(); + const int H = src.height(); + + if (subsampling <= 0) { + subsampling = calculate_subsampling(W, H, r); + } + + const size_t w = W / subsampling; + const size_t h = H / subsampling; + const float r1 = float(r) / subsampling; + array2D I1(w, h); array2D p1(w, h); - f_subsample(I1, I); - f_subsample(p1, p); + f_subsample(I1, guide); - DEBUG_DUMP(I); - DEBUG_DUMP(p); - DEBUG_DUMP(I1); - DEBUG_DUMP(p1); + if (&guide == &src) { + f_mean(p1, I1, r1); - float r1 = float(r) / subsampling; + apply(MUL, I1, I1, I1); // I1 = I1 * I1 - array2D meanI(w, h); - f_mean(meanI, I1, r1); - DEBUG_DUMP(meanI); + f_mean(I1, I1, r1); - array2D meanp(w, h); - f_mean(meanp, p1, r1); - DEBUG_DUMP(meanp); + apply(SUBMUL, I1, p1, p1, I1); // I1 = I1 - p1 * p1 + apply(DIVEPSILON, I1, I1, I1); // I1 = I1 / (I1 + epsilon) + apply(SUBMUL, p1, I1, p1, p1); // p1 = p1 - I1 * p1 - array2D &corrIp = p1; - apply(MUL, corrIp, I1, p1); - f_mean(corrIp, corrIp, r1); - DEBUG_DUMP(corrIp); + } else { + f_subsample(p1, src); - array2D &corrI = I1; - apply(MUL, corrI, I1, I1); - f_mean(corrI, corrI, r1); - DEBUG_DUMP(corrI); + array2D meanI(w, h); + f_mean(meanI, I1, r1); - array2D &varI = corrI; - apply(SUBMUL, varI, meanI, meanI, corrI); - DEBUG_DUMP(varI); + array2D meanp(w, h); + f_mean(meanp, p1, r1); - array2D &covIp = corrIp; - apply(SUBMUL, covIp, meanI, meanp, corrIp); - DEBUG_DUMP(covIp); + apply(MUL, p1, I1, p1); - array2D &a = varI; - apply(DIVEPSILON, a, covIp, varI); - DEBUG_DUMP(a); + f_mean(p1, p1, r1); - array2D &b = covIp; - apply(SUBMUL, b, a, meanI, meanp); - DEBUG_DUMP(b); + apply(MUL, I1, I1, I1); - array2D &meana = a; - f_mean(meana, a, r1); - DEBUG_DUMP(meana); + f_mean(I1, I1, r1); - array2D &meanb = b; - f_mean(meanb, b, r1); - DEBUG_DUMP(meanb); + apply(SUBMUL, I1, meanI, meanI, I1); + apply(SUBMUL, p1, meanI, meanp, p1); + apply(DIVEPSILON, I1, p1, I1); + apply(SUBMUL, p1, I1, meanI, meanp); + } - const int Ws = meana.width(); - const int Hs = meana.height(); - const int Wd = q.width(); - const int Hd = q.height(); + f_mean(I1, I1, r1); + f_mean(p1, p1, r1); - float col_scale = float (Ws) / float (Wd); - float row_scale = float (Hs) / float (Hd); + const int Ws = I1.width(); + const int Hs = I1.height(); + const int Wd = dst.width(); + const int Hd = dst.height(); + + const float col_scale = static_cast(Ws) / static_cast(Wd); + const float row_scale = static_cast(Hs) / static_cast(Hd); #ifdef _OPENMP #pragma omp parallel for if (multithread) #endif for (int y = 0; y < Hd; ++y) { - float ymrs = y * row_scale; - + const float ymrs = y * row_scale; for (int x = 0; x < Wd; ++x) { - q[y][x] = getBilinearValue(meana, x * col_scale, ymrs) * I[y][x] + getBilinearValue(meanb, x * col_scale, ymrs); + dst[y][x] = getBilinearValue(I1, x * col_scale, ymrs) * guide[y][x] + getBilinearValue(p1, x * col_scale, ymrs); } } } diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 60d4cb9ff..68af84970 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -35,7 +35,10 @@ #include "improcfun.h" #include "procparams.h" #include "rt_algo.h" +#include "rt_algo.h" #include "rt_math.h" +#define BENCHMARK +#include "StopWatch.h" extern Options options; @@ -43,24 +46,7 @@ namespace rtengine { namespace { -#if 0 -# define DEBUG_DUMP(arr) \ - do { \ - Imagefloat im(arr.width(), arr.height()); \ - const char *out = "/tmp/" #arr ".tif"; \ - for (int y = 0; y < im.getHeight(); ++y) { \ - for (int x = 0; x < im.getWidth(); ++x) { \ - im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \ - } \ - } \ - im.saveTIFF(out, 16); \ - } while (false) -#else -# define DEBUG_DUMP(arr) -#endif - - -int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread) +int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength) { const int W = R.width(); const int H = R.height(); @@ -73,22 +59,12 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr for (int x = 0; x < W; x += patchsize) { float val = RT_INFINITY_F; const int pW = min(x + patchsize, W); - for (int yy = y; yy < pH; ++yy) { - for (int xx = x; xx < pW; ++xx) { - float r = R[yy][xx]; - float g = G[yy][xx]; - float b = B[yy][xx]; - if (ambient) { - r /= ambient[0]; - g /= ambient[1]; - b /= ambient[2]; - } - val = min(val, r, g, b); + for (int xx = x; xx < pW; ++xx) { + for (int yy = y; yy < pH; ++yy) { + val = min(val, R[yy][xx] / ambient[0], G[yy][xx] / ambient[1], B[yy][xx] / ambient[2]); } } - if (clip) { - val = LIM01(val); - } + val = 1.f - strength * LIM01(val); for (int yy = y; yy < pH; ++yy) { std::fill(dst[yy] + x, dst[yy] + pW, val); } @@ -98,41 +74,59 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); } +int get_dark_channel_downsized(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, bool multithread) +{ + const int W = R.width(); + const int H = R.height(); + +#ifdef _OPENMP + #pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < H; y += patchsize) { + int yy = y / patchsize; + const int pH = min(y + patchsize, H); + for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { + float val = RT_INFINITY_F; + const int pW = min(x + patchsize, W); + for (int xp = x; xp < pW; ++xp) { + for (int yp = y; yp < pH; ++yp) { + val = min(val, R[yp][xp], G[yp][xp], B[yp][xp]); + } + } + dst[yy][xx] = val; + } + } + + return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); +} + float estimate_ambient_light(const array2D &R, const array2D &G, const array2D &B, const array2D &dark, int patchsize, int npatches, float ambient[3]) { const int W = R.width(); const int H = R.height(); - const auto get_percentile = - [](std::priority_queue &q, float prcnt) -> float - { - size_t n = LIM(q.size() * prcnt, 1, q.size()); - while (q.size() > n) { - q.pop(); - } - return q.top(); - }; - float darklim = RT_INFINITY_F; { - std::priority_queue p; - for (int y = 0; y < H; y += patchsize) { - for (int x = 0; x < W; x += patchsize) { - if (!OOG(dark[y][x], 1.f - 1e-5f)) { - p.push(dark[y][x]); + std::vector p; + for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) { + for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { + if (!OOG(dark[yy][xx], 1.f - 1e-5f)) { + p.push_back(dark[yy][xx]); } } } - darklim = get_percentile(p, 0.95); + const int pos = p.size() * 0.95; + std::nth_element(p.begin(), p.begin() + pos, p.end()); + darklim = p[pos]; } std::vector> patches; patches.reserve(npatches); - for (int y = 0; y < H; y += patchsize) { - for (int x = 0; x < W; x += patchsize) { - if (dark[y][x] >= darklim && !OOG(dark[y][x], 1.f)) { + for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) { + for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { + if (dark[yy][xx] >= darklim && !OOG(dark[yy][xx], 1.f)) { patches.push_back(std::make_pair(x, y)); } } @@ -145,33 +139,38 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c float bright_lim = RT_INFINITY_F; { - std::priority_queue l; + std::vector l; + l.reserve(patches.size() * patchsize * patchsize); - for (auto &p : patches) { - const int pW = min(p.first+patchsize, W); - const int pH = min(p.second+patchsize, H); + for (const auto &p : patches) { + const int pW = min(p.first + patchsize, W); + const int pH = min(p.second + patchsize, H); for (int y = p.second; y < pH; ++y) { for (int x = p.first; x < pW; ++x) { - l.push(R[y][x] + G[y][x] + B[y][x]); + l.push_back(R[y][x] + G[y][x] + B[y][x]); } } } - - bright_lim = get_percentile(l, 0.95); + const int pos = l.size() * 0.95; + std::nth_element(l.begin(), l.begin() + pos, l.end()); + bright_lim = l[pos]; } double rr = 0, gg = 0, bb = 0; int n = 0; - for (auto &p : patches) { - const int pW = min(p.first+patchsize, W); - const int pH = min(p.second+patchsize, H); +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) reduction(+:rr,gg,bb,n) +#endif + for (const auto &p : patches) { + const int pW = min(p.first + patchsize, W); + const int pH = min(p.second + patchsize, H); for (int y = p.second; y < pH; ++y) { for (int x = p.first; x < pW; ++x) { - float r = R[y][x]; - float g = G[y][x]; - float b = B[y][x]; + const float r = R[y][x]; + const float g = G[y][x]; + const float b = B[y][x]; if (r + g + b >= bright_lim) { rr += r; gg += g; @@ -181,6 +180,7 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c } } } + n = std::max(n, 1); ambient[0] = rr / n; ambient[1] = gg / n; @@ -211,12 +211,12 @@ void extract_channels(Imagefloat *img, array2D &r, array2D &g, arr void ImProcFunctions::dehaze(Imagefloat *img) { - if (!params->dehaze.enabled) { + if (!params->dehaze.enabled || params->dehaze.strength == 0.0) { return; } - +BENCHFUN img->normalizeFloatTo1(); - + const int W = img->getWidth(); const int H = img->getHeight(); const float strength = LIM01(float(params->dehaze.strength) / 100.f * 0.9f); @@ -229,21 +229,19 @@ void ImProcFunctions::dehaze(Imagefloat *img) int patchsize = max(int(5 / scale), 2); float ambient[3]; - array2D &t_tilde = dark; float max_t = 0.f; { - int npatches = 0; array2D R(W, H); array2D G(W, H); array2D B(W, H); extract_channels(img, R, G, B, patchsize, 1e-1, multiThread); - - patchsize = max(max(W, H) / 600, 2); - npatches = get_dark_channel(R, G, B, dark, patchsize, nullptr, false, multiThread); - DEBUG_DUMP(dark); - max_t = estimate_ambient_light(R, G, B, dark, patchsize, npatches, ambient); + patchsize = max(max(W, H) / 600, 2); + array2D darkDownsized(W / patchsize + 1, H / patchsize + 1); + const int npatches = get_dark_channel_downsized(R, G, B, darkDownsized, patchsize, multiThread); + + max_t = estimate_ambient_light(R, G, B, darkDownsized, patchsize, npatches, ambient); if (options.rtSettings.verbose) { std::cout << "dehaze: ambient light is " @@ -251,78 +249,102 @@ void ImProcFunctions::dehaze(Imagefloat *img) << std::endl; } - get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread); - } - - if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { - if (options.rtSettings.verbose) { - std::cout << "dehaze: no haze detected" << std::endl; + if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { + if (options.rtSettings.verbose) { + std::cout << "dehaze: no haze detected" << std::endl; + } + img->normalizeFloatTo65535(); + return; // probably no haze at all } - img->normalizeFloatTo65535(); - return; // probably no haze at all - } - DEBUG_DUMP(t_tilde); - -#ifdef _OPENMP - #pragma omp parallel for if (multiThread) -#endif - for (int y = 0; y < H; ++y) { - for (int x = 0; x < W; ++x) { - dark[y][x] = 1.f - strength * dark[y][x]; - } + get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread, strength); } const int radius = patchsize * 4; - const float epsilon = 1e-5; - array2D &t = t_tilde; + constexpr float epsilon = 1e-5f; { array2D guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE); - guidedFilter(guideB, t_tilde, t, radius, epsilon, multiThread); + guidedFilter(guideB, dark, dark, radius, epsilon, multiThread); } - DEBUG_DUMP(t); - if (options.rtSettings.verbose) { std::cout << "dehaze: max distance is " << max_t << std::endl; } - float depth = -float(params->dehaze.depth) / 100.f; + const float depth = -float(params->dehaze.depth) / 100.f; const float t0 = max(1e-3f, std::exp(depth * max_t)); const float teps = 1e-3f; #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif for (int y = 0; y < H; ++y) { - for (int x = 0; x < W; ++x) { + int x = 0; +#ifdef __SSE2__ + const vfloat onev = F2V(1.f); + const vfloat ambient0v = F2V(ambient[0]); + const vfloat ambient1v = F2V(ambient[1]); + const vfloat ambient2v = F2V(ambient[2]); + const vfloat t0v = F2V(t0); + const vfloat tepsv = F2V(teps); + const vfloat c65535v = F2V(65535.f); + for (; x < W - 3; x += 4) { // ensure that the transmission is such that to avoid clipping... - float rgb[3] = { img->r(y, x), img->g(y, x), img->b(y, x) }; + vfloat r = LVFU(img->r(y, x)); + vfloat g = LVFU(img->g(y, x)); + vfloat b = LVFU(img->b(y, x)); // ... t >= tl to avoid negative values - float tl = 1.f - min(rgb[0]/ambient[0], rgb[1]/ambient[1], rgb[2]/ambient[2]); + const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v)); // ... t >= tu to avoid values > 1 - float tu = t0 - teps; - for (int c = 0; c < 3; ++c) { - if (ambient[c] < 1) { - tu = max(tu, (rgb[c] - ambient[c])/(1.f - ambient[c])); - } - } - float mt = max(t[y][x], t0, tl + teps, tu + teps); - if (params->dehaze.showDepthMap) { - img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt); - } else { - float r = (rgb[0] - ambient[0]) / mt + ambient[0]; - float g = (rgb[1] - ambient[1]) / mt + ambient[1]; - float b = (rgb[2] - ambient[2]) / mt + ambient[2]; + r -= ambient0v; + g -= ambient1v; + b -= ambient2v; - img->r(y, x) = r; - img->g(y, x) = g; - img->b(y, x) = b; + vfloat tuv = t0v - tepsv; + tuv = vself(vmaskf_lt(ambient0v, onev), vmaxf(tuv, r / (onev - ambient0v)), tuv); + tuv = vself(vmaskf_lt(ambient1v, onev), vmaxf(tuv, g / (onev - ambient1v)), tuv); + tuv = vself(vmaskf_lt(ambient2v, onev), vmaxf(tuv, b / (onev - ambient2v)), tuv); + + const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv, tuv) + tepsv); + if (params->dehaze.showDepthMap) { + const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * c65535v; + STVFU(img->r(y, x), valv); + STVFU(img->g(y, x), valv); + STVFU(img->b(y, x), valv); + } else { + STVFU(img->r(y, x), (r / mtv + ambient0v) * c65535v); + STVFU(img->g(y, x), (g / mtv + ambient1v) * c65535v); + STVFU(img->b(y, x), (b / mtv + ambient2v) * c65535v); + } + } +#endif + for (; x < W; ++x) { + // ensure that the transmission is such that to avoid clipping... + float r = img->r(y, x); + float g = img->g(y, x); + float b = img->b(y, x); + // ... t >= tl to avoid negative values + const float tl = 1.f - min(r / ambient[0], g / ambient[1], b / ambient[2]); + // ... t >= tu to avoid values > 1 + r -= ambient[0]; + g -= ambient[1]; + b -= ambient[2]; + + float tu = t0 - teps; + tu = ambient[0] < 1.f ? max(tu, r / (1.f - ambient[0])) : tu; + tu = ambient[1] < 1.f ? max(tu, g / (1.f - ambient[1])) : tu; + tu = ambient[2] < 1.f ? max(tu, b / (1.f - ambient[2])) : tu; + + const float mt = max(dark[y][x], tl + teps, tu + teps); + if (params->dehaze.showDepthMap) { + img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * 65535.f; + } else { + img->r(y, x) = (r / mt + ambient[0]) * 65535.f; + img->g(y, x) = (g / mt + ambient[1]) * 65535.f; + img->b(y, x) = (b / mt + ambient[2]) * 65535.f; } } } - - img->normalizeFloatTo65535(); } From 3981e285b9bda62d3126ad9704f58bf232ba67e0 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 19 Sep 2019 22:06:41 +0200 Subject: [PATCH 05/31] dehaze: fix broken build on gcc < 9.x, #5456 --- rtengine/ipdehaze.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 68af84970..cf333a5d5 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -162,7 +162,8 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) reduction(+:rr,gg,bb,n) #endif - for (const auto &p : patches) { + for (size_t i = 0; i < patches.size(); ++i) { + const auto &p = patches[i]; const int pW = min(p.first + patchsize, W); const int pH = min(p.second + patchsize, H); From 7d5ec6c0678f5a5d8a0eabccc197b51128949e3b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 19 Sep 2019 22:21:45 +0200 Subject: [PATCH 06/31] Fix bug at right border in new boxblur function, #5456 --- rtengine/boxblur.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index 3020278b2..5cc7430e2 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -26,6 +26,7 @@ #include "alignedbuffer.h" #include "rt_math.h" #include "opthelper.h" +#include "StopWatch.h" namespace rtengine @@ -485,41 +486,42 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu #endif { const int remaining = W % numCols; + if (remaining > 0) { float (* const rowBuffer)[8] = (float(*)[8]) buffer; const int col = W - remaining; float len = radius + 1; - for(int k = 0; k < remaining; k++) { + for(int k = 0; k < remaining; ++k) { rowBuffer[0][k] = dst[0][col + k]; } - for (int i = 1; i <= radius; i++) { - for(int k = 0; k < remaining; k++) { - dst[0][col + k] += dst[i][col + k]; + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { + dst[0][col + k] += dst[row][col + k]; } } - for(int k = 0; k < remaining; k++) { + for(int k = 0; k < remaining; ++k) { dst[0][col + k] /= len; } - for (int row = 1; row <= radius; row++) { - for(int k = 0; k < remaining; k++) { + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { rowBuffer[row][k] = dst[row][col + k]; - dst[row][col + k] = (dst[(row - 1)][col + k] * len + dst[row + radius][col + k]) / (len + 1); - len ++; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); } + len ++; } const float rlen = 1.f / len; - for (int row = radius + 1; row < H - radius; row++) { - for(int k = 0; k < remaining; k++) { + for (int row = radius + 1; row < H - radius; ++row) { + for(int k = 0; k < remaining; ++k) { rowBuffer[row][k] = dst[row][col + k]; - dst[row][col + k] = dst[(row - 1)][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) * rlen; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) * rlen; } } - for (int row = H - radius; row < H; row++) { - for(int k = 0; k < remaining; k++) { + for (int row = H - radius; row < H; ++row) { + for(int k = 0; k < remaining; ++k) { dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1); - len --; } + len --; } } } From 7ff3192cc96d7f9339775a3469e52715ddf3e08a Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 20 Sep 2019 14:03:09 +0200 Subject: [PATCH 07/31] dehaze: added lumimance mode from ART, #5456, thanks to @agriggio --- rtdata/languages/default | 2 ++ rtengine/color.h | 5 +++++ rtengine/ipdehaze.cc | 43 +++++++++++++++++++++++++++++++--------- rtengine/procparams.cc | 9 ++++++--- rtengine/procparams.h | 2 +- rtgui/dehaze.cc | 16 +++++++++++++++ rtgui/dehaze.h | 5 ++++- rtgui/paramsedited.cc | 6 ++++++ rtgui/paramsedited.h | 1 + 9 files changed, 75 insertions(+), 14 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index 3749a706a..49dabe9b9 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -744,6 +744,7 @@ HISTORY_MSG_COLORTONING_LABREGION_SHOWMASK;CT - region show mask HISTORY_MSG_COLORTONING_LABREGION_SLOPE;CT - region slope HISTORY_MSG_DEHAZE_DEPTH;Dehaze - Depth HISTORY_MSG_DEHAZE_ENABLED;Haze Removal +HISTORY_MSG_DEHAZE_LUMINANCE;Dehaze - Luminance only HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP;Dehaze - Show depth map HISTORY_MSG_DEHAZE_STRENGTH;Dehaze - Strength HISTORY_MSG_DUALDEMOSAIC_AUTO_CONTRAST;Dual demosaic - Auto threshold @@ -1538,6 +1539,7 @@ TP_DEFRINGE_RADIUS;Radius TP_DEFRINGE_THRESHOLD;Threshold TP_DEHAZE_DEPTH;Depth TP_DEHAZE_LABEL;Haze Removal +TP_DEHAZE_LUMINANCE;Luminance only TP_DEHAZE_SHOW_DEPTH_MAP;Show depth map TP_DEHAZE_STRENGTH;Strength TP_DIRPYRDENOISE_CHROMINANCE_AMZ;Auto multi-zones diff --git a/rtengine/color.h b/rtengine/color.h index b859fb0cf..1031ca150 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -210,6 +210,11 @@ public: return r * workingspace[1][0] + g * workingspace[1][1] + b * workingspace[1][2]; } + static vfloat rgbLuminance(vfloat r, vfloat g, vfloat b, const vfloat workingspace[3]) + { + return r * workingspace[0] + g * workingspace[1] + b * workingspace[2]; + } + /** * @brief Convert red/green/blue to L*a*b * @brief Convert red/green/blue to hue/saturation/luminance diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index cf333a5d5..4eb5ed8e9 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -276,6 +276,13 @@ BENCHFUN const float depth = -float(params->dehaze.depth) / 100.f; const float t0 = max(1e-3f, std::exp(depth * max_t)); const float teps = 1e-3f; + + const bool luminance = params->dehaze.luminance; + TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); +#ifdef __SSE2__ + const vfloat wsv[3] = {F2V(ws[1][0]), F2V(ws[1][1]),F2V(ws[1][2])}; +#endif + const float ambientY = Color::rgbLuminance(ambient[0], ambient[1], ambient[2], ws); #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -286,6 +293,8 @@ BENCHFUN const vfloat ambient0v = F2V(ambient[0]); const vfloat ambient1v = F2V(ambient[1]); const vfloat ambient2v = F2V(ambient[2]); + const vfloat ambientYv = F2V(ambientY); + const vfloat epsYv = F2V(1e-5f); const vfloat t0v = F2V(t0); const vfloat tepsv = F2V(teps); const vfloat c65535v = F2V(65535.f); @@ -297,14 +306,14 @@ BENCHFUN // ... t >= tl to avoid negative values const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v)); // ... t >= tu to avoid values > 1 - r -= ambient0v; - g -= ambient1v; - b -= ambient2v; +// r -= ambient0v; +// g -= ambient1v; +// b -= ambient2v; vfloat tuv = t0v - tepsv; - tuv = vself(vmaskf_lt(ambient0v, onev), vmaxf(tuv, r / (onev - ambient0v)), tuv); - tuv = vself(vmaskf_lt(ambient1v, onev), vmaxf(tuv, g / (onev - ambient1v)), tuv); - tuv = vself(vmaskf_lt(ambient2v, onev), vmaxf(tuv, b / (onev - ambient2v)), tuv); + tuv = vself(vmaskf_lt(ambient0v, onev), vmaxf(tuv, (r - ambient0v) / (onev - ambient0v)), tuv); + tuv = vself(vmaskf_lt(ambient1v, onev), vmaxf(tuv, (g - ambient1v) / (onev - ambient1v)), tuv); + tuv = vself(vmaskf_lt(ambient2v, onev), vmaxf(tuv, (b - ambient2v) / (onev - ambient2v)), tuv); const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv, tuv) + tepsv); if (params->dehaze.showDepthMap) { @@ -312,10 +321,17 @@ BENCHFUN STVFU(img->r(y, x), valv); STVFU(img->g(y, x), valv); STVFU(img->b(y, x), valv); + } else if (luminance) { + const vfloat Yv = Color::rgbLuminance(r, g, b, wsv); + const vfloat YYv = (Yv - ambientYv) / mtv + ambientYv; + const vfloat fv = vself(vmaskf_gt(Yv, epsYv), c65535v * YYv / Yv, c65535v); + STVFU(img->r(y, x), r * fv); + STVFU(img->g(y, x), g * fv); + STVFU(img->b(y, x), b * fv); } else { - STVFU(img->r(y, x), (r / mtv + ambient0v) * c65535v); - STVFU(img->g(y, x), (g / mtv + ambient1v) * c65535v); - STVFU(img->b(y, x), (b / mtv + ambient2v) * c65535v); + STVFU(img->r(y, x), ((r - ambient0v) / mtv + ambient0v) * c65535v); + STVFU(img->g(y, x), ((g - ambient1v) / mtv + ambient1v) * c65535v); + STVFU(img->b(y, x), ((b - ambient2v) / mtv + ambient2v) * c65535v); } } #endif @@ -339,6 +355,15 @@ BENCHFUN const float mt = max(dark[y][x], tl + teps, tu + teps); if (params->dehaze.showDepthMap) { img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * 65535.f; + } else if (luminance) { + const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws); + const float YY = (Y - ambientY) / mt + ambientY; + if (Y > 1e-5f) { + const float f = 65535.f * YY / Y; + img->r(y, x) *= f; + img->g(y, x) *= f; + img->b(y, x) *= f; + } } else { img->r(y, x) = (r / mt + ambient[0]) * 65535.f; img->g(y, x) = (g / mt + ambient[1]) * 65535.f; diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index f88220c4e..fcf6dd4db 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -2517,7 +2517,8 @@ DehazeParams::DehazeParams() : enabled(false), strength(50), showDepthMap(false), - depth(25) + depth(25), + luminance(false) { } @@ -2527,7 +2528,8 @@ bool DehazeParams::operator ==(const DehazeParams& other) const enabled == other.enabled && strength == other.strength && showDepthMap == other.showDepthMap - && depth == other.depth; + && depth == other.depth + && luminance == other.luminance; } bool DehazeParams::operator !=(const DehazeParams& other) const @@ -3238,7 +3240,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->dehaze.strength, "Dehaze", "Strength", dehaze.strength, keyFile); saveToKeyfile(!pedited || pedited->dehaze.showDepthMap, "Dehaze", "ShowDepthMap", dehaze.showDepthMap, keyFile); saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Depth", dehaze.depth, keyFile); - + saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Luminance", dehaze.luminance, keyFile); // Directional pyramid denoising saveToKeyfile(!pedited || pedited->dirpyrDenoise.enabled, "Directional Pyramid Denoising", "Enabled", dirpyrDenoise.enabled, keyFile); saveToKeyfile(!pedited || pedited->dirpyrDenoise.enhance, "Directional Pyramid Denoising", "Enhance", dirpyrDenoise.enhance, keyFile); @@ -4878,6 +4880,7 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited) assignFromKeyfile(keyFile, "Dehaze", "Strength", pedited, dehaze.strength, pedited->dehaze.strength); assignFromKeyfile(keyFile, "Dehaze", "ShowDepthMap", pedited, dehaze.showDepthMap, pedited->dehaze.showDepthMap); assignFromKeyfile(keyFile, "Dehaze", "Depth", pedited, dehaze.depth, pedited->dehaze.depth); + assignFromKeyfile(keyFile, "Dehaze", "Luminance", pedited, dehaze.luminance, pedited->dehaze.luminance); } if (keyFile.has_group("Film Simulation")) { diff --git a/rtengine/procparams.h b/rtengine/procparams.h index ce03efc7d..a60c497bc 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -1342,7 +1342,7 @@ struct DehazeParams { int strength; bool showDepthMap; int depth; - + bool luminance; DehazeParams(); bool operator==(const DehazeParams &other) const; diff --git a/rtgui/dehaze.cc b/rtgui/dehaze.cc index 6f60d08d6..6b7fcd64f 100644 --- a/rtgui/dehaze.cc +++ b/rtgui/dehaze.cc @@ -36,6 +36,7 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false, EvDehazeStrength = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_STRENGTH"); EvDehazeShowDepthMap = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP"); EvDehazeDepth = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_DEPTH"); + EvDehazeLuminance = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_LUMINANCE"); strength = Gtk::manage(new Adjuster(M("TP_DEHAZE_STRENGTH"), 0., 100., 1., 50.)); strength->setAdjusterListener(this); @@ -45,12 +46,17 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false, depth->setAdjusterListener(this); depth->show(); + luminance = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_LUMINANCE"))); + luminance->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::luminanceChanged)); + luminance->show(); + showDepthMap = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_SHOW_DEPTH_MAP"))); showDepthMap->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::showDepthMapChanged)); showDepthMap->show(); pack_start(*strength); pack_start(*depth); + pack_start(*luminance); pack_start(*showDepthMap); } @@ -64,12 +70,14 @@ void Dehaze::read(const ProcParams *pp, const ParamsEdited *pedited) depth->setEditedState(pedited->dehaze.depth ? Edited : UnEdited); set_inconsistent(multiImage && !pedited->dehaze.enabled); showDepthMap->set_inconsistent(!pedited->dehaze.showDepthMap); + luminance->set_inconsistent(!pedited->dehaze.luminance); } setEnabled(pp->dehaze.enabled); strength->setValue(pp->dehaze.strength); depth->setValue(pp->dehaze.depth); showDepthMap->set_active(pp->dehaze.showDepthMap); + luminance->set_active(pp->dehaze.luminance); enableListener(); } @@ -81,12 +89,14 @@ void Dehaze::write(ProcParams *pp, ParamsEdited *pedited) pp->dehaze.depth = depth->getValue(); pp->dehaze.enabled = getEnabled(); pp->dehaze.showDepthMap = showDepthMap->get_active(); + pp->dehaze.luminance = luminance->get_active(); if (pedited) { pedited->dehaze.strength = strength->getEditedState(); pedited->dehaze.depth = depth->getEditedState(); pedited->dehaze.enabled = !get_inconsistent(); pedited->dehaze.showDepthMap = !showDepthMap->get_inconsistent(); + pedited->dehaze.luminance = !luminance->get_inconsistent(); } } @@ -138,6 +148,12 @@ void Dehaze::showDepthMapChanged() } } +void Dehaze::luminanceChanged() +{ + if (listener) { + listener->panelChanged(EvDehazeLuminance, luminance->get_active() ? M("GENERAL_ENABLED") : M("GENERAL_DISABLED")); + } +} void Dehaze::setBatchMode(bool batchMode) { diff --git a/rtgui/dehaze.h b/rtgui/dehaze.h index 3120dfc91..6a9d31cd1 100644 --- a/rtgui/dehaze.h +++ b/rtgui/dehaze.h @@ -28,12 +28,14 @@ class Dehaze: public ToolParamBlock, public AdjusterListener, public FoldableToo private: Adjuster *strength; Adjuster *depth; - Gtk::CheckButton *showDepthMap; + Gtk::CheckButton *showDepthMap; + Gtk::CheckButton *luminance; rtengine::ProcEvent EvDehazeEnabled; rtengine::ProcEvent EvDehazeStrength; rtengine::ProcEvent EvDehazeDepth; rtengine::ProcEvent EvDehazeShowDepthMap; + rtengine::ProcEvent EvDehazeLuminance; public: @@ -47,6 +49,7 @@ public: void adjusterChanged(Adjuster *a, double newval) override; void enabledChanged() override; void showDepthMapChanged(); + void luminanceChanged(); void setAdjusterBehavior(bool strengthAdd); }; diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index 2ab5702ea..9ea89f267 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -587,6 +587,7 @@ void ParamsEdited::set(bool v) dehaze.strength = v; dehaze.showDepthMap = v; dehaze.depth = v; + dehaze.luminance = v; metadata.mode = v; filmNegative.enabled = v; filmNegative.redRatio = v; @@ -1158,6 +1159,7 @@ void ParamsEdited::initFrom(const std::vector& dehaze.strength = dehaze.strength && p.dehaze.strength == other.dehaze.strength; dehaze.showDepthMap = dehaze.showDepthMap && p.dehaze.showDepthMap == other.dehaze.showDepthMap; dehaze.depth = dehaze.depth && p.dehaze.depth == other.dehaze.depth; + dehaze.luminance = dehaze.luminance && p.dehaze.luminance == other.dehaze.luminance; metadata.mode = metadata.mode && p.metadata.mode == other.metadata.mode; filmNegative.enabled = filmNegative.enabled && p.filmNegative.enabled == other.filmNegative.enabled; filmNegative.redRatio = filmNegative.redRatio && p.filmNegative.redRatio == other.filmNegative.redRatio; @@ -3224,6 +3226,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng toEdit.dehaze.showDepthMap = mods.dehaze.showDepthMap; } + if (dehaze.luminance) { + toEdit.dehaze.luminance = mods.dehaze.luminance; + } + if (metadata.mode) { toEdit.metadata.mode = mods.metadata.mode; } diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h index 1bd7170d4..41af0510d 100644 --- a/rtgui/paramsedited.h +++ b/rtgui/paramsedited.h @@ -596,6 +596,7 @@ struct DehazeParamsEdited { bool strength; bool showDepthMap; bool depth; + bool luminance; }; struct RAWParamsEdited { From 83a8ca8ef52a03b600610a40720149bee30c5bec Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 20 Sep 2019 15:29:35 +0200 Subject: [PATCH 08/31] dehaze: Fix artifacts when blue channel is clipped, #5456, thanks to @agriggio --- rtengine/ipdehaze.cc | 50 +++++++++++++------------------------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 4eb5ed8e9..81074a15b 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -264,10 +264,8 @@ BENCHFUN const int radius = patchsize * 4; constexpr float epsilon = 1e-5f; - { - array2D guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE); - guidedFilter(guideB, dark, dark, radius, epsilon, multiThread); - } + array2D guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE); + guidedFilter(guideB, dark, dark, radius, epsilon, multiThread); if (options.rtSettings.verbose) { std::cout << "dehaze: max distance is " << max_t << std::endl; @@ -300,22 +298,12 @@ BENCHFUN const vfloat c65535v = F2V(65535.f); for (; x < W - 3; x += 4) { // ensure that the transmission is such that to avoid clipping... - vfloat r = LVFU(img->r(y, x)); - vfloat g = LVFU(img->g(y, x)); - vfloat b = LVFU(img->b(y, x)); + const vfloat r = LVFU(img->r(y, x)); + const vfloat g = LVFU(img->g(y, x)); + const vfloat b = LVFU(img->b(y, x)); // ... t >= tl to avoid negative values const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v)); - // ... t >= tu to avoid values > 1 -// r -= ambient0v; -// g -= ambient1v; -// b -= ambient2v; - - vfloat tuv = t0v - tepsv; - tuv = vself(vmaskf_lt(ambient0v, onev), vmaxf(tuv, (r - ambient0v) / (onev - ambient0v)), tuv); - tuv = vself(vmaskf_lt(ambient1v, onev), vmaxf(tuv, (g - ambient1v) / (onev - ambient1v)), tuv); - tuv = vself(vmaskf_lt(ambient2v, onev), vmaxf(tuv, (b - ambient2v) / (onev - ambient2v)), tuv); - - const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv, tuv) + tepsv); + const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv + tepsv, t0v)); if (params->dehaze.showDepthMap) { const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * c65535v; STVFU(img->r(y, x), valv); @@ -337,37 +325,27 @@ BENCHFUN #endif for (; x < W; ++x) { // ensure that the transmission is such that to avoid clipping... - float r = img->r(y, x); - float g = img->g(y, x); - float b = img->b(y, x); + const float r = img->r(y, x); + const float g = img->g(y, x); + const float b = img->b(y, x); // ... t >= tl to avoid negative values const float tl = 1.f - min(r / ambient[0], g / ambient[1], b / ambient[2]); - // ... t >= tu to avoid values > 1 - r -= ambient[0]; - g -= ambient[1]; - b -= ambient[2]; - - float tu = t0 - teps; - tu = ambient[0] < 1.f ? max(tu, r / (1.f - ambient[0])) : tu; - tu = ambient[1] < 1.f ? max(tu, g / (1.f - ambient[1])) : tu; - tu = ambient[2] < 1.f ? max(tu, b / (1.f - ambient[2])) : tu; - - const float mt = max(dark[y][x], tl + teps, tu + teps); + const float mt = max(dark[y][x], t0, tl + teps); if (params->dehaze.showDepthMap) { img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * 65535.f; } else if (luminance) { const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws); - const float YY = (Y - ambientY) / mt + ambientY; if (Y > 1e-5f) { + const float YY = (Y - ambientY) / mt + ambientY; const float f = 65535.f * YY / Y; img->r(y, x) *= f; img->g(y, x) *= f; img->b(y, x) *= f; } } else { - img->r(y, x) = (r / mt + ambient[0]) * 65535.f; - img->g(y, x) = (g / mt + ambient[1]) * 65535.f; - img->b(y, x) = (b / mt + ambient[2]) * 65535.f; + img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * 65535.f; + img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * 65535.f; + img->b(y, x) = ((b - ambient[2]) / mt + ambient[2]) * 65535.f; } } } From a7cc59c91dc2b715203e63bfcf8f5423103a18e6 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 21 Sep 2019 21:33:05 +0200 Subject: [PATCH 09/31] dehaze: further speedup, stolen from ART, thanks @agriggio, #5456 --- rtengine/ipdehaze.cc | 91 ++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 81074a15b..6f516d95b 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -39,7 +39,7 @@ #include "rt_math.h" #define BENCHMARK #include "StopWatch.h" - +#include "rescale.h" extern Options options; namespace rtengine { @@ -83,24 +83,24 @@ int get_dark_channel_downsized(const array2D &R, const array2D &G, #pragma omp parallel for if (multithread) #endif for (int y = 0; y < H; y += patchsize) { - int yy = y / patchsize; const int pH = min(y + patchsize, H); - for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { + for (int x = 0; x < W; x += patchsize) { float val = RT_INFINITY_F; const int pW = min(x + patchsize, W); - for (int xp = x; xp < pW; ++xp) { - for (int yp = y; yp < pH; ++yp) { - val = min(val, R[yp][xp], G[yp][xp], B[yp][xp]); + for (int xx = x; xx < pW; ++xx) { + for (int yy = y; yy < pH; ++yy) { + val = min(val, R[yy][xx], G[yy][xx], B[yy][xx]); } } - dst[yy][xx] = val; + for (int yy = y; yy < pH; ++yy) { + std::fill(dst[yy] + x, dst[yy] + pW, val); + } } } return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); } - float estimate_ambient_light(const array2D &R, const array2D &G, const array2D &B, const array2D &dark, int patchsize, int npatches, float ambient[3]) { const int W = R.width(); @@ -109,10 +109,10 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c float darklim = RT_INFINITY_F; { std::vector p; - for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) { - for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { - if (!OOG(dark[yy][xx], 1.f - 1e-5f)) { - p.push_back(dark[yy][xx]); + for (int y = 0; y < H; y += patchsize) { + for (int x = 0; x < W; x += patchsize) { + if (!OOG(dark[y][x], 1.f - 1e-5f)) { + p.push_back(dark[y][x]); } } } @@ -124,9 +124,9 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c std::vector> patches; patches.reserve(npatches); - for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) { - for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) { - if (dark[yy][xx] >= darklim && !OOG(dark[yy][xx], 1.f)) { + for (int y = 0; y < H; y += patchsize) { + for (int x = 0; x < W; x += patchsize) { + if (dark[y][x] >= darklim && !OOG(dark[y][x], 1.f)) { patches.push_back(std::make_pair(x, y)); } } @@ -142,9 +142,9 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c std::vector l; l.reserve(patches.size() * patchsize * patchsize); - for (const auto &p : patches) { - const int pW = min(p.first + patchsize, W); - const int pH = min(p.second + patchsize, H); + for (auto &p : patches) { + const int pW = min(p.first+patchsize, W); + const int pH = min(p.second+patchsize, H); for (int y = p.second; y < pH; ++y) { for (int x = p.first; x < pW; ++x) { @@ -159,19 +159,15 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c double rr = 0, gg = 0, bb = 0; int n = 0; -#ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) reduction(+:rr,gg,bb,n) -#endif - for (size_t i = 0; i < patches.size(); ++i) { - const auto &p = patches[i]; - const int pW = min(p.first + patchsize, W); - const int pH = min(p.second + patchsize, H); + for (auto &p : patches) { + const int pW = min(p.first+patchsize, W); + const int pH = min(p.second+patchsize, H); for (int y = p.second; y < pH; ++y) { for (int x = p.first; x < pW; ++x) { - const float r = R[y][x]; - const float g = G[y][x]; - const float b = B[y][x]; + float r = R[y][x]; + float g = G[y][x]; + float b = B[y][x]; if (r + g + b >= bright_lim) { rr += r; gg += g; @@ -181,7 +177,6 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c } } } - n = std::max(n, 1); ambient[0] = rr / n; ambient[1] = gg / n; @@ -191,7 +186,6 @@ float estimate_ambient_light(const array2D &R, const array2D &G, c return darklim > 0 ? -1.125f * std::log(darklim) : std::log(std::numeric_limits::max()) / 2; } - void extract_channels(Imagefloat *img, array2D &r, array2D &g, array2D &b, int radius, float epsilon, bool multithread) { const int W = img->getWidth(); @@ -238,11 +232,32 @@ BENCHFUN array2D B(W, H); extract_channels(img, R, G, B, patchsize, 1e-1, multiThread); - patchsize = max(max(W, H) / 600, 2); - array2D darkDownsized(W / patchsize + 1, H / patchsize + 1); - const int npatches = get_dark_channel_downsized(R, G, B, darkDownsized, patchsize, multiThread); + { + constexpr int sizecap = 200; + float r = float(W)/float(H); + const int hh = r >= 1.f ? sizecap : sizecap / r; + const int ww = r >= 1.f ? sizecap * r : sizecap; - max_t = estimate_ambient_light(R, G, B, darkDownsized, patchsize, npatches, ambient); + if (W <= ww && H <= hh) { + // don't rescale small thumbs + array2D D(W, H); + int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread); + max_t = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient); + } else { + array2D RR(ww, hh); + array2D GG(ww, hh); + array2D BB(ww, hh); + rescaleNearest(R, RR, multiThread); + rescaleNearest(G, GG, multiThread); + rescaleNearest(B, BB, multiThread); + array2D D(ww, hh); + + int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread); + max_t = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient); + } + } + + patchsize = max(max(W, H) / 600, 2); if (options.rtSettings.verbose) { std::cout << "dehaze: ambient light is " @@ -250,14 +265,6 @@ BENCHFUN << std::endl; } - if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { - if (options.rtSettings.verbose) { - std::cout << "dehaze: no haze detected" << std::endl; - } - img->normalizeFloatTo65535(); - return; // probably no haze at all - } - get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread, strength); } From cab84aed38456964f34f1bbe68b4ca3104753cd7 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 22 Sep 2019 13:21:34 +0200 Subject: [PATCH 10/31] dehaze: fix bug in luminance mode, #5456 --- rtengine/ipdehaze.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 6f516d95b..125fa4d60 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -283,7 +283,7 @@ BENCHFUN const float teps = 1e-3f; const bool luminance = params->dehaze.luminance; - TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); #ifdef __SSE2__ const vfloat wsv[3] = {F2V(ws[1][0]), F2V(ws[1][1]),F2V(ws[1][2])}; #endif @@ -342,13 +342,11 @@ BENCHFUN img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * 65535.f; } else if (luminance) { const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws); - if (Y > 1e-5f) { - const float YY = (Y - ambientY) / mt + ambientY; - const float f = 65535.f * YY / Y; - img->r(y, x) *= f; - img->g(y, x) *= f; - img->b(y, x) *= f; - } + const float YY = (Y - ambientY) / mt + ambientY; + const float f = Y > 1e-5f ? 65535.f * YY / Y : 65535.f; + img->r(y, x) *= f; + img->g(y, x) *= f; + img->b(y, x) *= f; } else { img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * 65535.f; img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * 65535.f; From 189f474e033c908ed06bfcb32580ab63427b9ad0 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 22 Sep 2019 20:53:03 +0200 Subject: [PATCH 11/31] dehaze: add accidently removed early exit in case there is no haze detected, #5456 --- rtengine/ipdehaze.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 125fa4d60..00236d1de 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -257,6 +257,13 @@ BENCHFUN } } + if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) { + if (options.rtSettings.verbose) { + std::cout << "dehaze: no haze detected" << std::endl; + } + img->normalizeFloatTo65535(); + return; // probably no haze at all + } patchsize = max(max(W, H) / 600, 2); if (options.rtSettings.verbose) { From ca162e8ffc7ecd1ad924a0735c9e364c0aff3779 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 22 Sep 2019 23:45:19 +0200 Subject: [PATCH 12/31] dehaze: (experimental) input normalization to improve handling of overexposed pictures, thanks @agriggio, #5456 --- rtengine/ipdehaze.cc | 79 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 00236d1de..8ba1b935a 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -28,24 +28,69 @@ * */ +#include #include -#include +#include #include "guidedfilter.h" #include "improcfun.h" #include "procparams.h" -#include "rt_algo.h" -#include "rt_algo.h" +#include "rescale.h" #include "rt_math.h" #define BENCHMARK #include "StopWatch.h" -#include "rescale.h" + extern Options options; namespace rtengine { namespace { +float normalize(Imagefloat *rgb, bool multithread) +{ + float maxval = 0.f; + const int W = rgb->getWidth(); + const int H = rgb->getHeight(); +#ifdef _OPENMP +# pragma omp parallel for reduction(max:maxval) if (multithread) +#endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + maxval = max(maxval, rgb->r(y, x), rgb->g(y, x), rgb->b(y, x)); + } + } + maxval = max(maxval * 2.f, 65535.f); +#ifdef _OPENMP +# pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + rgb->r(y, x) /= maxval; + rgb->g(y, x) /= maxval; + rgb->b(y, x) /= maxval; + } + } + return maxval; +} + +void restore(Imagefloat *rgb, float maxval, bool multithread) +{ + const int W = rgb->getWidth(); + const int H = rgb->getHeight(); + if (maxval > 0.f && maxval != 1.f) { +#ifdef _OPENMP +# pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < H; ++y) { + for (int x = 0; x < W; ++x) { + rgb->r(y, x) *= maxval; + rgb->g(y, x) *= maxval; + rgb->b(y, x) *= maxval; + } + } + } +} + int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength) { const int W = R.width(); @@ -210,7 +255,7 @@ void ImProcFunctions::dehaze(Imagefloat *img) return; } BENCHFUN - img->normalizeFloatTo1(); + const float maxChannel = normalize(img, multiThread); const int W = img->getWidth(); const int H = img->getHeight(); @@ -261,7 +306,7 @@ BENCHFUN if (options.rtSettings.verbose) { std::cout << "dehaze: no haze detected" << std::endl; } - img->normalizeFloatTo65535(); + restore(img, maxChannel, multiThread); return; // probably no haze at all } patchsize = max(max(W, H) / 600, 2); @@ -309,7 +354,7 @@ BENCHFUN const vfloat epsYv = F2V(1e-5f); const vfloat t0v = F2V(t0); const vfloat tepsv = F2V(teps); - const vfloat c65535v = F2V(65535.f); + const vfloat cmaxChannelv = F2V(maxChannel); for (; x < W - 3; x += 4) { // ensure that the transmission is such that to avoid clipping... const vfloat r = LVFU(img->r(y, x)); @@ -319,21 +364,21 @@ BENCHFUN const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v)); const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv + tepsv, t0v)); if (params->dehaze.showDepthMap) { - const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * c65535v; + const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * cmaxChannelv; STVFU(img->r(y, x), valv); STVFU(img->g(y, x), valv); STVFU(img->b(y, x), valv); } else if (luminance) { const vfloat Yv = Color::rgbLuminance(r, g, b, wsv); const vfloat YYv = (Yv - ambientYv) / mtv + ambientYv; - const vfloat fv = vself(vmaskf_gt(Yv, epsYv), c65535v * YYv / Yv, c65535v); + const vfloat fv = vself(vmaskf_gt(Yv, epsYv), cmaxChannelv * YYv / Yv, cmaxChannelv); STVFU(img->r(y, x), r * fv); STVFU(img->g(y, x), g * fv); STVFU(img->b(y, x), b * fv); } else { - STVFU(img->r(y, x), ((r - ambient0v) / mtv + ambient0v) * c65535v); - STVFU(img->g(y, x), ((g - ambient1v) / mtv + ambient1v) * c65535v); - STVFU(img->b(y, x), ((b - ambient2v) / mtv + ambient2v) * c65535v); + STVFU(img->r(y, x), ((r - ambient0v) / mtv + ambient0v) * cmaxChannelv); + STVFU(img->g(y, x), ((g - ambient1v) / mtv + ambient1v) * cmaxChannelv); + STVFU(img->b(y, x), ((b - ambient2v) / mtv + ambient2v) * cmaxChannelv); } } #endif @@ -346,18 +391,18 @@ BENCHFUN const float tl = 1.f - min(r / ambient[0], g / ambient[1], b / ambient[2]); const float mt = max(dark[y][x], t0, tl + teps); if (params->dehaze.showDepthMap) { - img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * 65535.f; + img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * maxChannel; } else if (luminance) { const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws); const float YY = (Y - ambientY) / mt + ambientY; - const float f = Y > 1e-5f ? 65535.f * YY / Y : 65535.f; + const float f = Y > 1e-5f ? maxChannel * YY / Y : maxChannel; img->r(y, x) *= f; img->g(y, x) *= f; img->b(y, x) *= f; } else { - img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * 65535.f; - img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * 65535.f; - img->b(y, x) = ((b - ambient[2]) / mt + ambient[2]) * 65535.f; + img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * maxChannel; + img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * maxChannel; + img->b(y, x) = ((b - ambient[2]) / mt + ambient[2]) * maxChannel; } } } From f03605b73526235c27832f5815359943f4e0441e Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:39:21 +0200 Subject: [PATCH 13/31] boxblur: apply changes requested by @Floessie in code review --- rtengine/boxblur.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index 5cc7430e2..27aa9d2fc 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -20,6 +20,7 @@ #define _BOXBLUR_H_ #include +#include #include #include #include @@ -337,9 +338,10 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu #pragma omp parallel if (multiThread) #endif { - float* const buffer = new float[std::max(W, 8 * H)]; + std::unique_ptr buffer(new float[std::max(W, 8 * H)]); + //horizontal blur - float* const lineBuffer = buffer; + float* const lineBuffer = buffer.get(); #ifdef _OPENMP #pragma omp for #endif @@ -356,8 +358,9 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu for (int col = 1; col <= radius; col++) { lineBuffer[col] = src[row][col]; - dst[row][col] = tempval = (tempval * len + src[row][col + radius]) / (len + 1); - len ++; + tempval = (tempval * len + src[row][col + radius]) / (len + 1); + dst[row][col] = tempval; + ++len; } for (int col = radius + 1; col < W - radius; col++) { @@ -367,15 +370,15 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu for (int col = W - radius; col < W; col++) { dst[row][col] = tempval = (tempval * len - lineBuffer[col - radius - 1]) / (len - 1); - len --; + --len; } } //vertical blur #ifdef __SSE2__ - vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer; - vfloat leninitv = F2V(radius + 1); - vfloat onev = F2V(1.f); + vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get(); + const vfloat leninitv = F2V(radius + 1); + const vfloat onev = F2V(1.f); vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; #ifdef _OPENMP @@ -432,7 +435,7 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu } #else - float (* const rowBuffer)[8] = (float(*)[8]) buffer; + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); #ifdef _OPENMP #pragma omp for nowait #endif @@ -440,12 +443,12 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu for (int col = 0; col < W - numCols + 1; col += 8) { float len = radius + 1; - for(int k = 0; k < numCols; k++) { + for (int k = 0; k < numCols; k++) { rowBuffer[0][k] = dst[0][col + k]; } for (int i = 1; i <= radius; i++) { - for(int k = 0; k < numCols; k++) { + for (int k = 0; k < numCols; k++) { dst[0][col + k] += dst[i][col + k]; } } @@ -488,7 +491,7 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu const int remaining = W % numCols; if (remaining > 0) { - float (* const rowBuffer)[8] = (float(*)[8]) buffer; + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); const int col = W - remaining; float len = radius + 1; @@ -525,7 +528,6 @@ inline void boxblur (float** src, float** dst, int radius, int W, int H, bool mu } } } - delete [] buffer; } } From 3ca7f09655e0c703a7d23c74414723d8f0d7b8a4 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:39:50 +0200 Subject: [PATCH 14/31] guidedfilter: apply changes requested by @Floessie in code review --- rtengine/guidedfilter.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index 8d19fc7a5..159e89504 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -66,7 +66,7 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 enum Op {MUL, DIVEPSILON, SUBMUL}; const auto apply = - [=](Op op, array2D &res, const array2D &a, const array2D &b, const array2D &c=array2D()) -> void + [multithread, epsilon](Op op, array2D &res, const array2D &a, const array2D &b, const array2D &c=array2D()) -> void { const int w = res.width(); const int h = res.height(); @@ -96,13 +96,13 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 }; const auto f_subsample = - [=](array2D &d, const array2D &s) -> void + [multithread](array2D &d, const array2D &s) -> void { rescaleBilinear(s, d, multithread); }; const auto f_mean = - [&](array2D &d, array2D &s, int rad) -> void + [multithread](array2D &d, array2D &s, int rad) -> void { rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1); boxblur(s, d, rad, s.width(), s.height(), multithread); From 7a8225d2745ef86eb71b537a2a852cbdf9882f26 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:40:19 +0200 Subject: [PATCH 15/31] procparams: apply changes requested by @Floessie in code review --- rtengine/procparams.cc | 1 + rtengine/procparams.h | 1 + 2 files changed, 2 insertions(+) diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index 734796cd8..8de7e2b55 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -3243,6 +3243,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->dehaze.showDepthMap, "Dehaze", "ShowDepthMap", dehaze.showDepthMap, keyFile); saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Depth", dehaze.depth, keyFile); saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Luminance", dehaze.luminance, keyFile); + // Directional pyramid denoising saveToKeyfile(!pedited || pedited->dirpyrDenoise.enabled, "Directional Pyramid Denoising", "Enabled", dirpyrDenoise.enabled, keyFile); saveToKeyfile(!pedited || pedited->dirpyrDenoise.enhance, "Directional Pyramid Denoising", "Enhance", dirpyrDenoise.enhance, keyFile); diff --git a/rtengine/procparams.h b/rtengine/procparams.h index 0ef0f045b..734ca7556 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -1344,6 +1344,7 @@ struct DehazeParams { bool showDepthMap; int depth; bool luminance; + DehazeParams(); bool operator==(const DehazeParams &other) const; From 204475dd05576f9e2d4b8b8d8e443603f1f3d9aa Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:42:23 +0200 Subject: [PATCH 16/31] sleefsseavx: added horizontal min and max --- rtengine/sleefsseavx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 3000c1c10..cce88df5d 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1390,6 +1390,18 @@ static inline float vhadd( vfloat a ) { return _mm_cvtss_f32(_mm_add_ss(a, _mm_shuffle_ps(a, a, 1))); } +static inline float vhmin(vfloat a) { + // returns min(a[0], a[1], a[2], a[3]) + a = vminf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vminf(a, _mm_shuffle_ps(a, a, 1))); +} + +static inline float vhmax(vfloat a) { + // returns max(a[0], a[1], a[2], a[3]) + a = vmaxf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vmaxf(a, _mm_shuffle_ps(a, a, 1))); +} + static INLINE vfloat vmul2f(vfloat a){ // fastest way to multiply by 2 return a + a; From 9cff2bca486e36b8bedc2612d26587d938df5c6a Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:43:43 +0200 Subject: [PATCH 17/31] dehaze: speedup and changes requested by @Floessie in code review, #5456 --- rtengine/ipdehaze.cc | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index 8ba1b935a..fb4f73903 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . - */ +*/ /* * Haze removal using the algorithm described in the paper: @@ -26,7 +26,7 @@ * * using a guided filter for the "soft matting" of the transmission map * - */ +*/ #include #include @@ -52,7 +52,7 @@ float normalize(Imagefloat *rgb, bool multithread) const int W = rgb->getWidth(); const int H = rgb->getHeight(); #ifdef _OPENMP -# pragma omp parallel for reduction(max:maxval) if (multithread) + #pragma omp parallel for reduction(max:maxval) schedule(dynamic, 16) if (multithread) #endif for (int y = 0; y < H; ++y) { for (int x = 0; x < W; ++x) { @@ -61,7 +61,7 @@ float normalize(Imagefloat *rgb, bool multithread) } maxval = max(maxval * 2.f, 65535.f); #ifdef _OPENMP -# pragma omp parallel for if (multithread) + #pragma omp parallel for schedule(dynamic, 16) if (multithread) #endif for (int y = 0; y < H; ++y) { for (int x = 0; x < W; ++x) { @@ -102,13 +102,36 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr for (int y = 0; y < H; y += patchsize) { const int pH = min(y + patchsize, H); for (int x = 0; x < W; x += patchsize) { - float val = RT_INFINITY_F; + float minR = RT_INFINITY_F; + float minG = RT_INFINITY_F; + float minB = RT_INFINITY_F; +#ifdef __SSE2__ + vfloat minRv = F2V(minR); + vfloat minGv = F2V(minG); + vfloat minBv = F2V(minB); +#endif const int pW = min(x + patchsize, W); - for (int xx = x; xx < pW; ++xx) { - for (int yy = y; yy < pH; ++yy) { - val = min(val, R[yy][xx] / ambient[0], G[yy][xx] / ambient[1], B[yy][xx] / ambient[2]); + for (int yy = y; yy < pH; ++yy) { + int xx = x; +#ifdef __SSE2__ + for (; xx < pW - 3; xx += 4) { + minRv = vminf(minRv, LVFU(R[yy][xx])); + minGv = vminf(minGv, LVFU(G[yy][xx])); + minBv = vminf(minBv, LVFU(B[yy][xx])); + } +#endif + for (; xx < pW; ++xx) { + minR = min(minR, R[yy][xx]); + minG = min(minG, G[yy][xx]); + minB = min(minB, B[yy][xx]); } } +#ifdef __SSE2__ + minR = min(minR, vhmin(minRv)); + minG = min(minG, vhmin(minGv)); + minB = min(minB, vhmin(minBv)); +#endif + float val = min(minR / ambient[0], minG / ambient[1], minB / ambient[2]); val = 1.f - strength * LIM01(val); for (int yy = y; yy < pH; ++yy) { std::fill(dst[yy] + x, dst[yy] + pW, val); @@ -269,25 +292,25 @@ BENCHFUN int patchsize = max(int(5 / scale), 2); float ambient[3]; - float max_t = 0.f; + float maxDistance = 0.f; { - array2D R(W, H); + array2D& R = dark; // R and dark can safely use the same buffer, which is faster and reduces memory allocations/deallocations array2D G(W, H); array2D B(W, H); extract_channels(img, R, G, B, patchsize, 1e-1, multiThread); { constexpr int sizecap = 200; - float r = float(W)/float(H); + const float r = static_cast(W) / static_cast(H); const int hh = r >= 1.f ? sizecap : sizecap / r; const int ww = r >= 1.f ? sizecap * r : sizecap; if (W <= ww && H <= hh) { // don't rescale small thumbs array2D D(W, H); - int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread); - max_t = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient); + const int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread); + maxDistance = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient); } else { array2D RR(ww, hh); array2D GG(ww, hh); @@ -297,8 +320,8 @@ BENCHFUN rescaleNearest(B, BB, multiThread); array2D D(ww, hh); - int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread); - max_t = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient); + const int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread); + maxDistance = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient); } } @@ -327,11 +350,11 @@ BENCHFUN guidedFilter(guideB, dark, dark, radius, epsilon, multiThread); if (options.rtSettings.verbose) { - std::cout << "dehaze: max distance is " << max_t << std::endl; + std::cout << "dehaze: max distance is " << maxDistance << std::endl; } const float depth = -float(params->dehaze.depth) / 100.f; - const float t0 = max(1e-3f, std::exp(depth * max_t)); + const float t0 = max(1e-3f, std::exp(depth * maxDistance)); const float teps = 1e-3f; const bool luminance = params->dehaze.luminance; From c83b577dc7b06a8d092c27f60f2e47911e5caba8 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 14:22:14 +0200 Subject: [PATCH 18/31] hasselblad_load_raw: apply changes requested by @Floessie in code review --- rtengine/dcraw.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index 5275c42c1..c9c66a8fb 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -2440,10 +2440,10 @@ void CLASS hasselblad_load_raw() } for (int col = 0; col < raw_width; col += 2) { for (int s = 0; s < tiff_samples * 2; s += 2) { - int len[2]; - for (int c = 0; c < 2; ++c) { - len[c] = ph1_huff(jh.huff[0]); - } + const int len[2]= { + ph1_huff(jh.huff[0]), + ph1_huff(jh.huff[0]) + }; for (int c = 0; c < 2; ++c) { diff[s + c] = hb_bits(len[c]); if ((diff[s + c] & (1 << (len[c] - 1))) == 0) { From 1e41ee62650fc27129a1b602539536434949372d Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 15:54:11 +0200 Subject: [PATCH 19/31] dehaze: fix two cppcheck style warnings, #5456 --- rtengine/ipdehaze.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index fb4f73903..d36c876c2 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -91,7 +91,7 @@ void restore(Imagefloat *rgb, float maxval, bool multithread) } } -int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength) +int get_dark_channel(const array2D &R, const array2D &G, const array2D &B, const array2D &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength) { const int W = R.width(); const int H = R.height(); @@ -142,7 +142,7 @@ int get_dark_channel(const array2D &R, const array2D &G, const arr return (W / patchsize + ((W % patchsize) > 0)) * (H / patchsize + ((H % patchsize) > 0)); } -int get_dark_channel_downsized(const array2D &R, const array2D &G, const array2D &B, array2D &dst, int patchsize, bool multithread) +int get_dark_channel_downsized(const array2D &R, const array2D &G, const array2D &B, const array2D &dst, int patchsize, bool multithread) { const int W = R.width(); const int H = R.height(); From d1ccf27780d8825eadab1effc5fb38af96707497 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 16:32:23 +0200 Subject: [PATCH 20/31] Capture sharpening: add missing history message --- rtdata/languages/default | 1 + 1 file changed, 1 insertion(+) diff --git a/rtdata/languages/default b/rtdata/languages/default index be1638e8e..d54536603 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -770,6 +770,7 @@ HISTORY_MSG_PDSHARPEN_AUTO_RADIUS;CAS - Auto radius HISTORY_MSG_PDSHARPEN_GAMMA;CAS - Gamma HISTORY_MSG_PDSHARPEN_ITERATIONS;CAS - Iterations HISTORY_MSG_PDSHARPEN_RADIUS;CAS - Radius +HISTORY_MSG_PDSHARPEN_RADIUS_OFFSET;CAS - Radius offset HISTORY_MSG_PIXELSHIFT_DEMOSAIC;PS - Demosaic method for motion HISTORY_MSG_PREPROCESS_LINEDENOISE_DIRECTION;Line noise filter direction HISTORY_MSG_PREPROCESS_PDAFLINESFILTER;PDAF lines filter From 8d5c999ad84946f127815416f39cfc389f6d30da Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 16:44:55 +0200 Subject: [PATCH 21/31] hasselblad_load_raw: fix warning --- rtengine/dcraw.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index ad634c1e3..ef209118b 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -2441,8 +2441,8 @@ void CLASS hasselblad_load_raw() for (int col = 0; col < raw_width; col += 2) { for (int s = 0; s < tiff_samples * 2; s += 2) { const int len[2]= { - ph1_huff(jh.huff[0]), - ph1_huff(jh.huff[0]) + static_cast(ph1_huff(jh.huff[0])), + static_cast(ph1_huff(jh.huff[0])) }; for (int c = 0; c < 2; ++c) { diff[s + c] = hb_bits(len[c]); From 57466be795ee871cc04bbbce9b399e1ee05db814 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 20:33:32 +0200 Subject: [PATCH 22/31] Capture sharpening: Label and tooltip changes, also renamed key for favorite tab to capturesharpening --- rtdata/languages/default | 5 ++--- rtgui/pdsharpening.cc | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index d54536603..aabff4bbe 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -770,7 +770,7 @@ HISTORY_MSG_PDSHARPEN_AUTO_RADIUS;CAS - Auto radius HISTORY_MSG_PDSHARPEN_GAMMA;CAS - Gamma HISTORY_MSG_PDSHARPEN_ITERATIONS;CAS - Iterations HISTORY_MSG_PDSHARPEN_RADIUS;CAS - Radius -HISTORY_MSG_PDSHARPEN_RADIUS_OFFSET;CAS - Radius offset +HISTORY_MSG_PDSHARPEN_RADIUS_BOOST;CAS - Corner radius boost HISTORY_MSG_PIXELSHIFT_DEMOSAIC;PS - Demosaic method for motion HISTORY_MSG_PREPROCESS_LINEDENOISE_DIRECTION;Line noise filter direction HISTORY_MSG_PREPROCESS_PDAFLINESFILTER;PDAF lines filter @@ -1802,7 +1802,6 @@ TP_PCVIGNETTE_ROUNDNESS_TOOLTIP;Roundness:\n0 = rectangle,\n50 = fitted ellipse, TP_PCVIGNETTE_STRENGTH;Strength TP_PCVIGNETTE_STRENGTH_TOOLTIP;Filter strength in stops (reached in corners). TP_PDSHARPENING_LABEL;Capture Sharpening -TP_PDSHARPENING_AUTORADIUS_TOOLTIP;If the checkbox is checked, RawTherapee calculates a value based on the raw data of the image. TP_PERSPECTIVE_HORIZONTAL;Horizontal TP_PERSPECTIVE_LABEL;Perspective TP_PERSPECTIVE_VERTICAL;Vertical @@ -2038,7 +2037,7 @@ TP_SHARPENING_LABEL;Sharpening TP_SHARPENING_METHOD;Method TP_SHARPENING_ONLYEDGES;Sharpen only edges TP_SHARPENING_RADIUS;Radius -TP_SHARPENING_RADIUS_OFFSET;Radius corner offset +TP_SHARPENING_RADIUS_BOOST;Corner radius boost TP_SHARPENING_RLD;RL Deconvolution TP_SHARPENING_RLD_AMOUNT;Amount TP_SHARPENING_RLD_DAMPING;Damping diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc index f25e44e69..cd34a466e 100644 --- a/rtgui/pdsharpening.cc +++ b/rtgui/pdsharpening.cc @@ -26,14 +26,14 @@ using namespace rtengine; using namespace rtengine::procparams; -PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDSHARPENING_LABEL"), false, true) +PdSharpening::PdSharpening() : FoldableToolPanel(this, "capturesharpening", M("TP_PDSHARPENING_LABEL"), false, true) { auto m = ProcEventMapper::getInstance(); EvPdShrContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_CONTRAST"); EvPdSharpenGamma = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_GAMMA"); EvPdShrDRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS"); - EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_OFFSET"); + EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_BOOST"); EvPdShrDIterations = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_ITERATIONS"); EvPdShrAutoContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_CONTRAST"); EvPdShrAutoRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_RADIUS"); @@ -42,7 +42,7 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS hb->show(); contrast = Gtk::manage(new Adjuster(M("TP_SHARPENING_CONTRAST"), 0, 200, 1, 10)); contrast->setAdjusterListener(this); - contrast->addAutoButton(M("TP_RAW_DUALDEMOSAICAUTOCONTRAST_TOOLTIP")); + contrast->addAutoButton(); contrast->setAutoValue(true); pack_start(*contrast); @@ -53,9 +53,9 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS Gtk::VBox* rld = Gtk::manage(new Gtk::VBox()); gamma = Gtk::manage(new Adjuster(M("TP_SHARPENING_GAMMA"), 0.5, 6.0, 0.05, 1.00)); dradius = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS"), 0.4, 1.15, 0.01, 0.75)); - dradius->addAutoButton(M("TP_PDSHARPENING_AUTORADIUS_TOOLTIP")); + dradius->addAutoButton(); dradius->setAutoValue(true); - dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_OFFSET"), 0.0, 0.5, 0.01, 0.0)); + dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_BOOST"), 0.0, 0.5, 0.01, 0.0)); diter = Gtk::manage(new Adjuster(M("TP_SHARPENING_RLD_ITERATIONS"), 1, 100, 1, 20)); rld->pack_start(*gamma); rld->pack_start(*dradius); From e62b004434e72c042bd8d05dabae74417482feee Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 22:14:52 +0200 Subject: [PATCH 23/31] dehaze: removed benchmark code --- rtengine/ipdehaze.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc index d36c876c2..e7bf71ba6 100644 --- a/rtengine/ipdehaze.cc +++ b/rtengine/ipdehaze.cc @@ -37,8 +37,6 @@ #include "procparams.h" #include "rescale.h" #include "rt_math.h" -#define BENCHMARK -#include "StopWatch.h" extern Options options; @@ -277,7 +275,7 @@ void ImProcFunctions::dehaze(Imagefloat *img) if (!params->dehaze.enabled || params->dehaze.strength == 0.0) { return; } -BENCHFUN + const float maxChannel = normalize(img, multiThread); const int W = img->getWidth(); From 5b72cc0dd3ce0288408ee0a1b0785f7c603a059e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Tue, 24 Sep 2019 09:59:31 +0200 Subject: [PATCH 24/31] Quote parameters correctly for Linux when spawning (#5463) --- rtgui/extprog.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rtgui/extprog.cc b/rtgui/extprog.cc index a6a9050c0..95c1c937d 100644 --- a/rtgui/extprog.cc +++ b/rtgui/extprog.cc @@ -58,7 +58,7 @@ bool ExtProgAction::execute (const std::vector& fileNames) const } for (const auto& fileName : fileNames) { - cmdLine += " \"" + fileName + "\""; + cmdLine += " " + Glib::shell_quote(fileName); } return ExtProgStore::spawnCommandAsync (cmdLine); @@ -256,7 +256,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName) #else - auto cmdLine = Glib::ustring("gimp \"") + fileName + Glib::ustring("\""); + auto cmdLine = Glib::ustring("gimp ") + Glib::shell_quote(fileName); auto success = spawnCommandAsync (cmdLine); #endif @@ -291,7 +291,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName) #else - cmdLine = Glib::ustring("gimp-remote \"") + fileName + Glib::ustring("\""); + cmdLine = Glib::ustring("gimp-remote ") + Glib::shell_quote(fileName); success = ExtProgStore::spawnCommandAsync (cmdLine); #endif @@ -312,7 +312,7 @@ bool ExtProgStore::openInPhotoshop (const Glib::ustring& fileName) #else - const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + Glib::ustring("\" \"") + fileName + Glib::ustring("\""); + const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + "\" " + Glib::shell_quote(fileName); #endif @@ -334,7 +334,7 @@ bool ExtProgStore::openInCustomEditor (const Glib::ustring& fileName) #else - const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + Glib::ustring("\" \"") + fileName + Glib::ustring("\""); + const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + "\" " + Glib::shell_quote(fileName); return spawnCommandAsync (cmdLine); #endif From 54ca2977c3ee3219d96b84fbdb0a2bbbce4f1af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Tue, 24 Sep 2019 14:54:13 +0200 Subject: [PATCH 25/31] Add missing inits and header to `PdSharpening` --- rtgui/pdsharpening.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc index cd34a466e..3134afa1c 100644 --- a/rtgui/pdsharpening.cc +++ b/rtgui/pdsharpening.cc @@ -18,17 +18,23 @@ */ #include -#include "eventmapper.h" +#include + #include "pdsharpening.h" + +#include "eventmapper.h" #include "options.h" + #include "../rtengine/procparams.h" using namespace rtengine; using namespace rtengine::procparams; -PdSharpening::PdSharpening() : FoldableToolPanel(this, "capturesharpening", M("TP_PDSHARPENING_LABEL"), false, true) +PdSharpening::PdSharpening() : + FoldableToolPanel(this, "capturesharpening", M("TP_PDSHARPENING_LABEL"), false, true), + lastAutoContrast(true), + lastAutoRadius(true) { - auto m = ProcEventMapper::getInstance(); EvPdShrContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_CONTRAST"); EvPdSharpenGamma = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_GAMMA"); From 26bfb526bf8abd87b2f52022bb7537f98ab5a8ba Mon Sep 17 00:00:00 2001 From: Morgan Hardwood Date: Tue, 24 Sep 2019 17:52:12 +0200 Subject: [PATCH 26/31] Deleted obsolete travis.yml file --- .travis.yml.fixme | 44 -------------------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 .travis.yml.fixme diff --git a/.travis.yml.fixme b/.travis.yml.fixme deleted file mode 100644 index 0aa85f3b4..000000000 --- a/.travis.yml.fixme +++ /dev/null @@ -1,44 +0,0 @@ -sudo: required -dist: trusty - -language: cpp - -compiler: - - gcc - -os: - - linux - -#branches: -# only: -# - master - -notifications: - irc: - channels: - - "chat.freenode.net#rawtherapee" - skip_join: true - template: - - "%{repository}/%{branch} (%{commit} - %{author}): %{build_url}: %{message}" - email: - on_success: change - on_failure: always - -env: - global: - - OMP_NUM_THREADS=4 - -before_install: - - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ xenial main" - - sudo apt-get -qq update - - sudo apt-get install gcc-6 g++-6 - - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 - - sudo apt-get install build-essential cmake curl git libbz2-dev libcanberra-gtk3-dev libexiv2-dev libexpat-dev libfftw3-dev libglibmm-2.4-dev libgtk-3-dev libgtkmm-3.0-dev libiptcdata0-dev libjpeg8-dev liblcms2-dev libpng12-dev libsigc++-2.0-dev libtiff5-dev zlib1g-dev - -before_script: - - mkdir build - - cd build - - cmake -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" -DWITH_LTO="OFF" -DPROC_TARGET_NUMBER="2" .. - -script: make From 79b3ff8e6e559e85c5eec1a80460f5da4212447e Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 24 Sep 2019 18:59:02 +0200 Subject: [PATCH 27/31] capture sharpening: allow negative corner boost --- rtengine/capturesharpening.cc | 17 ++++++++++------- rtgui/pdsharpening.cc | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index e08243713..ef8a55a6e 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -581,14 +581,17 @@ BENCHFUN gauss5x5mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel5); } } else { - if (sigmaCornerOffset > 0.0) { - float lkernel7[7][7]; + if (sigmaCornerOffset != 0.0) { const float distance = sqrt(rtengine::SQR(i + tileSize / 2 - H / 2) + rtengine::SQR(j + tileSize / 2 - W / 2)); - compute7x7kernel(sigma + distanceFactor * distance, lkernel7); - for (int k = 0; k < iterations - 1; ++k) { - // apply 7x7 gaussian blur and divide luminance by result of gaussian blur - gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7); - gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7); + const float sigmaTile = sigma + distanceFactor * distance; + if (sigmaTile >= 0.4f) { + float lkernel7[7][7]; + compute7x7kernel(sigma + distanceFactor * distance, lkernel7); + for (int k = 0; k < iterations - 1; ++k) { + // apply 7x7 gaussian blur and divide luminance by result of gaussian blur + gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7); + gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7); + } } } else { for (int k = 0; k < iterations; ++k) { diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc index cd34a466e..759461ba5 100644 --- a/rtgui/pdsharpening.cc +++ b/rtgui/pdsharpening.cc @@ -55,7 +55,7 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "capturesharpening", M("T dradius = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS"), 0.4, 1.15, 0.01, 0.75)); dradius->addAutoButton(); dradius->setAutoValue(true); - dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_BOOST"), 0.0, 0.5, 0.01, 0.0)); + dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_BOOST"), -0.5, 0.5, 0.01, 0.0)); diter = Gtk::manage(new Adjuster(M("TP_SHARPENING_RLD_ITERATIONS"), 1, 100, 1, 20)); rld->pack_start(*gamma); rld->pack_start(*dradius); From 851a12e165c50333dcd779f78ea0658c38798972 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 24 Sep 2019 19:18:26 +0200 Subject: [PATCH 28/31] capture sharpening: more clear variable names --- rtengine/capturesharpening.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index ef8a55a6e..6720d9abc 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -532,9 +532,9 @@ BENCHFUN constexpr int tileSize = 194; constexpr int border = 5; constexpr int fullTileSize = tileSize + 2 * border; - const float maxRadius = std::min(1.15f, sigma + sigmaCornerOffset); - const float maxDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f)); - const float distanceFactor = (maxRadius - sigma) / maxDistance; + const float cornerRadius = std::min(1.15f, sigma + sigmaCornerOffset); + const float cornerDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f)); + const float distanceFactor = (cornerRadius - sigma) / cornerDistance; double progress = startVal; const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H); From 5a19632475eda7958d93a9c9ce94fa3841fca7b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Wed, 25 Sep 2019 11:33:56 +0200 Subject: [PATCH 29/31] Fix non-SSE2 build --- rtengine/color.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtengine/color.h b/rtengine/color.h index 1031ca150..97835ba10 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -210,10 +210,12 @@ public: return r * workingspace[1][0] + g * workingspace[1][1] + b * workingspace[1][2]; } +#ifdef __SSE2__ static vfloat rgbLuminance(vfloat r, vfloat g, vfloat b, const vfloat workingspace[3]) { return r * workingspace[0] + g * workingspace[1] + b * workingspace[2]; } +#endif /** * @brief Convert red/green/blue to L*a*b From 277c494fefc69a63ddd970af217ef4499616bdf5 Mon Sep 17 00:00:00 2001 From: Morgan Hardwood Date: Wed, 25 Sep 2019 11:54:30 +0200 Subject: [PATCH 30/31] Downgrade desktop file to Version=1.0, #5470 Version=1.1 caused Travis CI builds to fail. Version 1.1 was necessary only to standardize the Keywords key, but this key is a potential pitfall and unnecessary. --- rtdata/rawtherapee.desktop.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rtdata/rawtherapee.desktop.in b/rtdata/rawtherapee.desktop.in index c6c675c4d..107ce2a42 100644 --- a/rtdata/rawtherapee.desktop.in +++ b/rtdata/rawtherapee.desktop.in @@ -1,6 +1,6 @@ [Desktop Entry] Type=Application -Version=1.1 +Version=1.0 Name=RawTherapee GenericName=Raw Photo Editor GenericName[cs]=Editor raw obrázků @@ -16,5 +16,4 @@ Exec=rawtherapee %f Terminal=false MimeType=image/jpeg;image/png;image/tiff;image/x-adobe-dng;image/x-canon-cr2;image/x-canon-crf;image/x-canon-crw;image/x-fuji-raf;image/x-hasselblad-3fr;image/x-hasselblad-fff;image/x-jpg;image/x-kodak-dcr;image/x-kodak-k25;image/x-kodak-kdc;image/x-leaf-mos;image/x-leica-rwl;image/x-mamiya-mef;image/x-minolta-mrw;image/x-nikon-nef;image/x-nikon-nrw;image/x-olympus-orf;image/x-panasonic-raw;image/x-panasonic-rw2;image/x-pentax-pef;image/x-pentax-raw;image/x-phaseone-iiq;image/x-raw;image/x-rwz;image/x-samsung-srw;image/x-sigma-x3f;image/x-sony-arq;image/x-sony-arw;image/x-sony-sr2;image/x-sony-srf;image/x-tif; Categories=Graphics;Photography;2DGraphics;RasterGraphics;GTK; -Keywords=raw;photo;photography;develop;pp3;graphics; StartupWMClass=rawtherapee From b86b7a4af6f9b7248d9d3af0bc77395475a7721c Mon Sep 17 00:00:00 2001 From: Morgan Hardwood Date: Wed, 25 Sep 2019 12:08:43 +0200 Subject: [PATCH 31/31] Reverted Keywords key in desktop file, #5470 --- rtdata/rawtherapee.desktop.in | 1 + 1 file changed, 1 insertion(+) diff --git a/rtdata/rawtherapee.desktop.in b/rtdata/rawtherapee.desktop.in index 107ce2a42..b059e7d6a 100644 --- a/rtdata/rawtherapee.desktop.in +++ b/rtdata/rawtherapee.desktop.in @@ -16,4 +16,5 @@ Exec=rawtherapee %f Terminal=false MimeType=image/jpeg;image/png;image/tiff;image/x-adobe-dng;image/x-canon-cr2;image/x-canon-crf;image/x-canon-crw;image/x-fuji-raf;image/x-hasselblad-3fr;image/x-hasselblad-fff;image/x-jpg;image/x-kodak-dcr;image/x-kodak-k25;image/x-kodak-kdc;image/x-leaf-mos;image/x-leica-rwl;image/x-mamiya-mef;image/x-minolta-mrw;image/x-nikon-nef;image/x-nikon-nrw;image/x-olympus-orf;image/x-panasonic-raw;image/x-panasonic-rw2;image/x-pentax-pef;image/x-pentax-raw;image/x-phaseone-iiq;image/x-raw;image/x-rwz;image/x-samsung-srw;image/x-sigma-x3f;image/x-sony-arq;image/x-sony-arw;image/x-sony-sr2;image/x-sony-srf;image/x-tif; Categories=Graphics;Photography;2DGraphics;RasterGraphics;GTK; +Keywords=raw;photo;photography;develop;pp3;graphics; StartupWMClass=rawtherapee