merge with dev

2020-01-25 08:14:14 +01:00
parent ba7a61c391 227c2fbba7
commit 83ebcff455
12 changed files with 76 additions and 89 deletions
--- a/rtengine/array2D.h
+++ b/rtengine/array2D.h
@@ -188,6 +188,16 @@ public:
        }
    }

+    void fill(const T val, bool multiThread = false)
+    {
+#ifdef _OPENMP
+        #pragma omp parallel for if(multiThread)
+#endif
+        for (int i = 0; i < x * y; ++i) {
+            data[i] = val;
+        }
+    }
+
    void free()
    {
        if ((owner) && (data)) {
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -99,31 +99,7 @@ void compute3x3kernel(float sigma, float kernel[3][3]) {
    }
 }

-inline void initTile(float** dst, const int tileSize)
-{
-
-    // first rows
-    for (int i = 0; i < 3; ++i) {
-        for (int j = 0; j < tileSize; ++j) {
-            dst[i][j] = 1.f;
-        }
-    }
-
-    // left and right border
-    for (int i = 3; i < tileSize - 3; ++i) {
-        dst[i][0] = dst[i][1] = dst[i][2] = 1.f;
-        dst[i][tileSize - 3] = dst[i][tileSize - 2] = dst[i][tileSize - 1] = 1.f;
-    }
-
-    // last rows
-    for (int i = tileSize - 3 ; i < tileSize; ++i) {
-        for (int j = 0; j < tileSize; ++j) {
-            dst[i][j] = 1.f;
-        }
-    }
-}
-
-inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[3][3])
+void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[3][3])
 {

    const float c11 = kernel[0][0];
@@ -145,7 +121,7 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES
    }
 }

-inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[5][5])
+void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[5][5])
 {

    const float c21 = kernel[0][1];
@@ -173,7 +149,7 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES
    }
 }

-inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[7][7])
+void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int tileSize, const float kernel[7][7])
 {

    const float c31 = kernel[0][2];
@@ -207,7 +183,7 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST
    }
 }

-inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[3][3])
+void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[3][3])
 {
    const float c11 = kernel[0][0];
    const float c10 = kernel[0][1];
@@ -229,7 +205,7 @@ inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int t

 }

-inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[5][5])
+void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[5][5])
 {

    const float c21 = kernel[0][1];
@@ -257,7 +233,7 @@ inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int
    }
 }

-inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[7][7])
+void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int tileSize, const float kernel[7][7])
 {

    const float c31 = kernel[0][2];
@@ -578,9 +554,9 @@ BENCHFUN
        int progresscounter = 0;
        array2D<float> tmpIThr(fullTileSize, fullTileSize);
        array2D<float> tmpThr(fullTileSize, fullTileSize);
+        tmpThr.fill(1.f);
        array2D<float> lumThr(fullTileSize, fullTileSize);
        array2D<float> iterCheck(tileSize, tileSize);
-        initTile(tmpThr, fullTileSize);
 #ifdef _OPENMP
        #pragma omp for schedule(dynamic,16) collapse(2)
 #endif
@@ -705,13 +681,13 @@ BENCHFUN
                    // special handling for small tiles at end of row or column
                    for (int k = border, ii = endOfCol ? H - fullTileSize : i - border; k < fullTileSize - border; ++k) {
                        for (int l = border, jj = endOfRow ? W - fullTileSize : j - border; l < fullTileSize - border; ++l) {
-                            luminance[ii + k][jj + l] = rtengine::intp(blend[ii + k][jj + l], std::max(tmpIThr[k][l], 0.0f), luminance[ii + k][jj + l]);
+                            luminance[ii + k][jj + l] = rtengine::intp(blend[ii + k][jj + l], tmpIThr[k][l], luminance[ii + k][jj + l]);
                        }
                    }
                } else {
                    for (int ii = border; ii < fullTileSize - border; ++ii) {
                        for (int jj = border; jj < fullTileSize - border; ++jj) {
-                            luminance[i + ii - border][j + jj - border] = rtengine::intp(blend[i + ii - border][j + jj - border], std::max(tmpIThr[ii][jj], 0.0f), luminance[i + ii - border][j + jj - border]);
+                            luminance[i + ii - border][j + jj - border] = rtengine::intp(blend[i + ii - border][j + jj - border], tmpIThr[ii][jj], luminance[i + ii - border][j + jj - border]);
                        }
                    }
                }
--- a/rtengine/iplab2rgb.cc
+++ b/rtengine/iplab2rgb.cc
@@ -38,14 +38,8 @@ namespace {

 inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
 {
-    for (int j = 0, iy = 0; j < W; ++j) {
-        float r = src[iy] * MAXVALF;
-        float g = src[iy+1] * MAXVALF;
-        float b = src[iy+2] * MAXVALF;
-        dst[iy] = uint16ToUint8Rounded(CLIP(r));
-        dst[iy+1] = uint16ToUint8Rounded(CLIP(g));
-        dst[iy+2] = uint16ToUint8Rounded(CLIP(b));
-        iy += 3;
+    for (int j = 0; j < W * 3; ++j) {
+        dst[j] = uint16ToUint8Rounded(CLIP(src[j] * MAXVALF));
    }
 }

@@ -90,8 +84,8 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image)
 {
    if (monitorTransform) {

-        int W = lab->W;
-        int H = lab->H;
+        const int W = lab->W;
+        const int H = lab->H;
        unsigned char * data = image->data;

        // cmsDoTransform is relatively expensive
@@ -100,18 +94,19 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image)
 #endif
        {
            AlignedBuffer<float> pBuf(3 * lab->W);
-            AlignedBuffer<float> mBuf(3 * lab->W);

+            AlignedBuffer<float> mBuf;
            AlignedBuffer<float> gwBuf1;
            AlignedBuffer<float> gwBuf2;

            if (gamutWarning) {
                gwBuf1.resize(3 * lab->W);
                gwBuf2.resize(3 * lab->W);
+                mBuf.resize(3 * lab->W);
            }

            float *buffer = pBuf.data;
-            float *outbuffer = mBuf.data;
+            float *outbuffer = gamutWarning ? mBuf.data : pBuf.data; // make in place transformations when gamutWarning is not needed

 #ifdef _OPENMP
            #pragma omp for schedule(dynamic,16)
@@ -132,7 +127,7 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image)
                    buffer[iy++] = rb[j] / 327.68f;
                }

-                cmsDoTransform (monitorTransform, buffer, outbuffer, W);
+                cmsDoTransform(monitorTransform, buffer, outbuffer, W);
                copyAndClampLine(outbuffer, data + ix, W);

                if (gamutWarning) {
--- a/rtengine/sleef.h
+++ b/rtengine/sleef.h
@@ -929,7 +929,7 @@ __inline float mulsignf(float x, float y) {
    return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31)));
 }

-__inline float signf(float d) { return copysign(1, d); }
+__inline float signf(float d) { return std::copysign(1.f, d); }
 __inline float mlaf(float x, float y, float z) { return x * y + z; }

 __inline int xisnanf(float x) { return x != x; }