dehaze: further speedup, stolen from ART, thanks @agriggio, #5456

2019-09-21 21:33:05 +02:00
parent 83a8ca8ef5
commit a7cc59c91d
1 changed files with 49 additions and 42 deletions
--- a/rtengine/ipdehaze.cc
+++ b/rtengine/ipdehaze.cc
@@ -39,7 +39,7 @@
 #include "rt_math.h"
 #define BENCHMARK
 #include "StopWatch.h"
-
+#include "rescale.h"
 extern Options options;
 namespace rtengine {
@@ -83,24 +83,24 @@ int get_dark_channel_downsized(const array2D<float> &R, const array2D<float> &G,
    #pragma omp parallel for if (multithread)
 #endif
    for (int y = 0; y < H; y += patchsize) {
        int yy = y / patchsize;
        const int pH = min(y + patchsize, H);
-        for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) {
+        for (int x = 0; x < W; x += patchsize) {
            float val = RT_INFINITY_F;
            const int pW = min(x + patchsize, W);
-            for (int xp = x; xp < pW; ++xp) {
+            for (int xx = x; xx < pW; ++xx) {
-                for (int yp = y; yp < pH; ++yp) {
+                for (int yy = y; yy < pH; ++yy) {
-                    val = min(val, R[yp][xp], G[yp][xp], B[yp][xp]);
+                    val = min(val, R[yy][xx], G[yy][xx], B[yy][xx]);
                }
            }
-            dst[yy][xx] = val;
+            for (int yy = y; yy < pH; ++yy) {
                std::fill(dst[yy] + x, dst[yy] + pW, val);
            }
        }
    }
    return (W / patchsize + ((W % patchsize) > 0)) *  (H / patchsize + ((H % patchsize) > 0));
 }
 float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, const array2D<float> &B, const array2D<float> &dark, int patchsize, int npatches, float ambient[3])
 {
    const int W = R.width();
@@ -109,10 +109,10 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
    float darklim = RT_INFINITY_F;
    {
        std::vector<float> p;
-        for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) {
+        for (int y = 0; y < H; y += patchsize) {
-            for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) {
+            for (int x = 0; x < W; x += patchsize) {
-                if (!OOG(dark[yy][xx], 1.f - 1e-5f)) {
+                if (!OOG(dark[y][x], 1.f - 1e-5f)) {
-                    p.push_back(dark[yy][xx]);
+                    p.push_back(dark[y][x]);
                }
            }
        }
@@ -124,9 +124,9 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
    std::vector<std::pair<int, int>> patches;
    patches.reserve(npatches);
-    for (int y = 0, yy = 0; y < H; y += patchsize, ++yy) {
+    for (int y = 0; y < H; y += patchsize) {
-        for (int x = 0, xx = 0; x < W; x += patchsize, ++xx) {
+        for (int x = 0; x < W; x += patchsize) {
-            if (dark[yy][xx] >= darklim && !OOG(dark[yy][xx], 1.f)) {
+            if (dark[y][x] >= darklim && !OOG(dark[y][x], 1.f)) {
                patches.push_back(std::make_pair(x, y));
            }
        }
@@ -142,7 +142,7 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
        std::vector<float> l;
        l.reserve(patches.size() * patchsize * patchsize);
-        for (const auto &p : patches) {
+        for (auto &p : patches) {
            const int pW = min(p.first+patchsize, W);
            const int pH = min(p.second+patchsize, H);
@@ -159,19 +159,15 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
    double rr = 0, gg = 0, bb = 0;
    int n = 0;
-#ifdef _OPENMP
+    for (auto &p : patches) {
    #pragma omp parallel for schedule(dynamic) reduction(+:rr,gg,bb,n)
 #endif
    for (size_t i = 0; i < patches.size(); ++i) {
        const auto &p = patches[i];
        const int pW = min(p.first+patchsize, W);
        const int pH = min(p.second+patchsize, H);
        for (int y = p.second; y < pH; ++y) {
            for (int x = p.first; x < pW; ++x) {
-                const float r = R[y][x];
+                float r = R[y][x];
-                const float g = G[y][x];
+                float g = G[y][x];
-                const float b = B[y][x];
+                float b = B[y][x];
                if (r + g + b >= bright_lim) {
                    rr += r;
                    gg += g;
@@ -181,7 +177,6 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
            }
        }
    }
    n = std::max(n, 1);
    ambient[0] = rr / n;
    ambient[1] = gg / n;
@@ -191,7 +186,6 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
    return darklim > 0 ? -1.125f * std::log(darklim) : std::log(std::numeric_limits<float>::max()) / 2;
 }
 void extract_channels(Imagefloat *img, array2D<float> &r, array2D<float> &g, array2D<float> &b, int radius, float epsilon, bool multithread)
 {
    const int W = img->getWidth();
@@ -238,11 +232,32 @@ BENCHFUN
        array2D<float> B(W, H);
        extract_channels(img, R, G, B, patchsize, 1e-1, multiThread);
-        patchsize = max(max(W, H) / 600, 2);
+        {
-        array2D<float> darkDownsized(W / patchsize + 1, H / patchsize + 1);
+            constexpr int sizecap = 200;
-        const int npatches = get_dark_channel_downsized(R, G, B, darkDownsized, patchsize, multiThread);
+            float r = float(W)/float(H);
            const int hh = r >= 1.f ? sizecap : sizecap / r;
            const int ww = r >= 1.f ? sizecap * r : sizecap;
-        max_t = estimate_ambient_light(R, G, B, darkDownsized, patchsize, npatches, ambient);
+            if (W <= ww && H <= hh) {
                // don't rescale small thumbs
                array2D<float> D(W, H);
                int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread);
                max_t = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient);
            } else {
                array2D<float> RR(ww, hh);
                array2D<float> GG(ww, hh);
                array2D<float> BB(ww, hh);
                rescaleNearest(R, RR, multiThread);
                rescaleNearest(G, GG, multiThread);
                rescaleNearest(B, BB, multiThread);
                array2D<float> D(ww, hh);
                int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread);
                max_t = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient);
            }
        }
        patchsize = max(max(W, H) / 600, 2);
        if (options.rtSettings.verbose) {
            std::cout << "dehaze: ambient light is "
@@ -250,14 +265,6 @@ BENCHFUN
                      << std::endl;
        }
        if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) {
            if (options.rtSettings.verbose) {
                std::cout << "dehaze: no haze detected" << std::endl;
            }
            img->normalizeFloatTo65535();
            return; // probably no haze at all
        }
        get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread, strength);
    }