Merge branch 'dev' into microcontrast

2018-05-24 00:19:46 +02:00
parent fcfd813714 bac4533160
commit 252fa441c4
5 changed files with 129 additions and 81 deletions
--- a/rtdata/languages/Deutsch
+++ b/rtdata/languages/Deutsch
@@ -56,6 +56,7 @@
 #55 06.04.2018 Erweiterung (TooWaBoo) RT 5.4
 #56 27.04.2018 Erweiterung (TooWaBoo) RT 5.4
 #57 17.05.2018 Erweiterung (TooWaBoo) RT 5.4
+#58 19.05.2018 Erweiterung (TooWaBoo) RT 5.4

 ABOUT_TAB_BUILD;Version
 ABOUT_TAB_CREDITS;Danksagungen
@@ -2278,11 +2279,11 @@ ZOOMPANEL_ZOOMOUT;Herauszoomen\nTaste: <b>-</b>
 ! Untranslated keys follow; remove the ! prefix after an entry is translated.
 !!!!!!!!!!!!!!!!!!!!!!!!!

-!ADJUSTER_RESET_TO_DEFAULT;<b>Click</b> - reset to default value.\n<b>Ctrl</b>+<b>click</b> - reset to initial value.
-!GENERAL_RESET;Reset
-!HISTORY_MSG_235;B&amp;W - CM - Auto
-!HISTORY_MSG_237;B&amp;W - CM
-!HISTORY_MSG_273;CT - Color Balance SMH
-!HISTORY_MSG_392;W - Residual - Color Balance
-!TP_BWMIX_MIXC;Channel Mixer
-!TP_BWMIX_NEUTRAL;Reset
+ADJUSTER_RESET_TO_DEFAULT;<b>Klick</b> - Auf Standardwert zurücksetzen.\n<b>Strg</b> + <b>Klick</b> - Auf Initialwert zurücksetzen.
+GENERAL_RESET;Zurücksetzen
+HISTORY_MSG_235;(Schwarz/Weiß)\nAuto-Kanalmixer
+HISTORY_MSG_237;(Schwarz/Weiß) - Mixer
+HISTORY_MSG_273;(Farbanpassungen)\nFarbausgleich\nRegler zurücksetzen
+HISTORY_MSG_392;(Wavelet) - Restbild\nFarbausgleich
+TP_BWMIX_MIXC;Kanalmixer
+TP_BWMIX_NEUTRAL;Zurücksetzen
--- a/rtengine/CA_correct_RT.cc
+++ b/rtengine/CA_correct_RT.cc
@@ -27,7 +27,7 @@
 #include "rawimagesource.h"
 #include "rt_math.h"
 #include "median.h"
-
+#include "StopWatch.h"
 namespace {

 bool LinEqSolve(int nDim, double* pfMatr, double* pfVect, double* pfSolution)
@@ -111,7 +111,7 @@ bool LinEqSolve(int nDim, double* pfMatr, double* pfVect, double* pfSolution)
 using namespace std;
 using namespace rtengine;

-void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const double cablue, const double caautostrength, array2D<float> &rawData)
+float* RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const double cablue, const double caautostrength, array2D<float> &rawData, double *fitParamsTransfer, bool fitParamsIn, bool fitParamsOut, float *buffer, bool freeBuffer)
 {
 // multithreaded and vectorized by Ingo Weyrich
    constexpr int ts = 128;
@@ -124,7 +124,7 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
        for(int j = 0; j < 2; j++)
            if(FC(i, j) == 3) {
                printf("CA correction supports only RGB Colour filter arrays\n");
-                return;
+                return buffer;
            }

    volatile double progress = 0.0;
@@ -135,17 +135,6 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const

    // local variables
    const int width = W + (W & 1), height = H;
-    //temporary array to store simple interpolation of G
-    float *Gtmp = (float (*)) malloc ((height * width) / 2 * sizeof * Gtmp);
-
-    // temporary array to avoid race conflicts, only every second pixel needs to be saved here
-    float *RawDataTmp = (float*) malloc( (height * width) * sizeof(float) / 2);
-
-    float blockave[2][2] = {{0, 0}, {0, 0}}, blocksqave[2][2] = {{0, 0}, {0, 0}}, blockdenom[2][2] = {{0, 0}, {0, 0}}, blockvar[2][2];
-
-    // Because we can't break parallel processing, we need a switch do handle the errors
-    bool processpasstwo = true;
-
    constexpr int border = 8;
    constexpr int border2 = 16;

@@ -154,12 +143,36 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
    const int vblsz = ceil((float)(height + border2) / (ts - border2) + 2 + vz1);
    const int hblsz = ceil((float)(width + border2) / (ts - border2) + 2 + hz1);

+    //temporary array to store simple interpolation of G
+    if (!buffer) {
+        buffer = static_cast<float*>(malloc ((height * width + vblsz * hblsz * (2 * 2 + 1)) * sizeof(float)));
+    }
+    float *Gtmp = buffer;
+    float *RawDataTmp = buffer + (height * width) / 2;
+
    //block CA shift values and weight assigned to block
-    float* const blockwt = static_cast<float*>(calloc(vblsz * hblsz * (2 * 2 + 1), sizeof(float)));
+    float *const blockwt = buffer + (height * width);
+    memset(blockwt, 0, vblsz * hblsz * (2 * 2 + 1) * sizeof(float));
    float (*blockshifts)[2][2] = (float (*)[2][2])(blockwt + vblsz * hblsz);

-    double fitparams[2][2][16];
+    float blockave[2][2] = {{0, 0}, {0, 0}}, blocksqave[2][2] = {{0, 0}, {0, 0}}, blockdenom[2][2] = {{0, 0}, {0, 0}}, blockvar[2][2];

+    // Because we can't break parallel processing, we need a switch do handle the errors
+    bool processpasstwo = true;
+
+    double fitparams[2][2][16];
+    const bool fitParamsSet = fitParamsTransfer && fitParamsIn;
+    if(autoCA && fitParamsSet) {
+        // use stored parameters
+        int index = 0;
+        for(int c = 0; c < 2; ++c) {
+            for(int d = 0; d < 2; ++d) {
+                for(int e = 0; e < 16; ++e) {
+                    fitparams[c][d][e] = fitParamsTransfer[index++];
+                }
+            }
+        }
+    }
    //order of 2d polynomial fit (polyord), and numpar=polyord^2
    int polyord = 4, numpar = 16;

@@ -174,20 +187,16 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const

        int shifthfloor[3], shiftvfloor[3], shifthceil[3], shiftvceil[3];

-        //local quadratic fit to shift data within a tile
-        float   coeff[2][3][2];
-        //measured CA shift parameters for a tile
-        float   CAshift[2][2];
        //polynomial fit coefficients
        //residual CA shift amount within a plaquette
        float   shifthfrac[3], shiftvfrac[3];
-        //per thread data for evaluation of block CA shift variance
-        float   blockavethr[2][2] = {{0, 0}, {0, 0}}, blocksqavethr[2][2] = {{0, 0}, {0, 0}}, blockdenomthr[2][2] = {{0, 0}, {0, 0}};

        // assign working space
        constexpr int buffersize = sizeof(float) * ts * ts + 8 * sizeof(float) * ts * tsh + 8 * 64 + 63;
-        char *buffer = (char *) malloc(buffersize);
-        char *data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
+        constexpr int buffersizePassTwo = sizeof(float) * ts * ts + 4 * sizeof(float) * ts * tsh + 4 * 64 + 63;
+        char * const bufferThr = (char *) malloc((autoCA && !fitParamsSet) ? buffersize : buffersizePassTwo);
+
+        char * const data = (char*)( ( uintptr_t(bufferThr) + uintptr_t(63)) / 64 * 64);

        // shift the beginning of all arrays but the first by 64 bytes to avoid cache miss conflicts on CPUs which have <= 4-way associative L1-Cache

@@ -197,6 +206,7 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
        rgb[1]         = (float (*)) (data + sizeof(float) * ts * tsh + 1 * 64);
        rgb[2]         = (float (*)) (data + sizeof(float) * (ts * ts + ts * tsh) + 2 * 64);

+        if (autoCA && !fitParamsSet) {
            //high pass filter for R/B in vertical direction
            float *rbhpfh  = (float (*)) (data + 2 * sizeof(float) * ts * ts + 3 * 64);
            //high pass filter for R/B in horizontal direction
@@ -209,17 +219,20 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
            float *grblpfh = (float (*)) (data + 4 * sizeof(float) * ts * ts + 7 * 64);
            //low pass filter for colour differences in vertical direction
            float *grblpfv = (float (*)) (data + 4 * sizeof(float) * ts * ts + sizeof(float) * ts * tsh + 8 * 64);
-        float *grbdiff = rbhpfh; // there is no overlap in buffer usage => share
-        //green interpolated to optical sample points for R/B
-        float *gshift  = rbhpfv; // there is no overlap in buffer usage => share
-
-
-        if (autoCA) {
            // Main algorithm: Tile loop calculating correction parameters per tile
+
+            //local quadratic fit to shift data within a tile
+            float coeff[2][3][2];
+            //measured CA shift parameters for a tile
+            float CAshift[2][2];
+
+            //per thread data for evaluation of block CA shift variance
+            float   blockavethr[2][2] = {{0, 0}, {0, 0}}, blocksqavethr[2][2] = {{0, 0}, {0, 0}}, blockdenomthr[2][2] = {{0, 0}, {0, 0}};
+
            #pragma omp for collapse(2) schedule(dynamic) nowait
            for (int top = -border ; top < height; top += ts - border2)
                for (int left = -border; left < width - (W & 1); left += ts - border2) {
-                    memset(buffer, 0, buffersize);
+                    memset(bufferThr, 0, buffersize);
                    const int vblock = ((top + border) / (ts - border2)) + 1;
                    const int hblock = ((left + border) / (ts - border2)) + 1;
                    const int bottom = min(top + ts, height + border);
@@ -741,7 +754,6 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
                                    processpasstwo = false;
                                }
                            }
-
                }

                //fitparams[polyord*i+j] gives the coefficients of (vblock^i hblock^j) in a polynomial fit for i,j<=4
@@ -752,11 +764,14 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const

        // Main algorithm: Tile loop
        if(processpasstwo) {
+            float *grbdiff = (float (*)) (data + 2 * sizeof(float) * ts * ts + 3 * 64); // there is no overlap in buffer usage => share
+            //green interpolated to optical sample points for R/B
+            float *gshift  = (float (*)) (data + 2 * sizeof(float) * ts * ts + sizeof(float) * ts * tsh + 4 * 64); // there is no overlap in buffer usage => share
            #pragma omp for schedule(dynamic) collapse(2) nowait

            for (int top = -border; top < height; top += ts - border2)
                for (int left = -border; left < width - (W & 1); left += ts - border2) {
-                    memset(buffer, 0, buffersize);
+                    memset(bufferThr, 0, buffersizePassTwo);
                    float lblockshifts[2][2];
                    const int vblock = ((top + border) / (ts - border2)) + 1;
                    const int hblock = ((left + border) / (ts - border2)) + 1;
@@ -902,25 +917,42 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
                    //end of border fill
                    // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-                    if (!autoCA) {
-                        //manual CA correction; use red/blue slider values to set CA shift parameters
-                        for (int rr = 3; rr < rr1 - 3; rr++)
-                            for (int cc = 3, indx = rr * ts + cc; cc < cc1 - 3; cc++, indx++) {
-                                int c = FC(rr, cc);
+                    if (!autoCA || fitParamsIn) {
+#ifdef __SSE2__
+                        const vfloat onev = F2V(1.f);
+                        const vfloat epsv = F2V(eps);
+#endif

-                                if (c != 1) {
+                        //manual CA correction; use red/blue slider values to set CA shift parameters
+                        for (int rr = 3; rr < rr1 - 3; rr++) {
+                            int cc = 3 + FC(rr, 1), c = FC(rr,cc), indx = rr * ts + cc;
+#ifdef __SSE2__
+                            for (; cc < cc1 - 10; cc += 8, indx += 8) {
+                                //compute directional weights using image gradients
+                                vfloat val1v = epsv + vabsf(LC2VFU(rgb[1][(rr + 1) * ts + cc]) - LC2VFU(rgb[1][(rr - 1) * ts + cc]));
+                                vfloat val2v = epsv + vabsf(LC2VFU(rgb[1][indx + 1]) - LC2VFU(rgb[1][indx - 1]));
+                                vfloat wtuv = onev / SQRV(val1v + vabsf(LVFU(rgb[c][(rr * ts + cc) >> 1]) - LVFU(rgb[c][((rr - 2) * ts + cc) >> 1])) + vabsf(LC2VFU(rgb[1][(rr - 1) * ts + cc]) - LC2VFU(rgb[1][(rr - 3) * ts + cc])));
+                                vfloat wtdv = onev / SQRV(val1v + vabsf(LVFU(rgb[c][(rr * ts + cc) >> 1]) - LVFU(rgb[c][((rr + 2) * ts + cc) >> 1])) + vabsf(LC2VFU(rgb[1][(rr + 1) * ts + cc]) - LC2VFU(rgb[1][(rr + 3) * ts + cc])));
+                                vfloat wtlv = onev / SQRV(val2v + vabsf(LVFU(rgb[c][indx >> 1]) - LVFU(rgb[c][(indx - 2) >> 1])) + vabsf(LC2VFU(rgb[1][indx - 1]) - LC2VFU(rgb[1][indx - 3])));
+                                vfloat wtrv = onev / SQRV(val2v + vabsf(LVFU(rgb[c][indx >> 1]) - LVFU(rgb[c][(indx + 2) >> 1])) + vabsf(LC2VFU(rgb[1][indx + 1]) - LC2VFU(rgb[1][indx + 3])));
+
+                                //store in rgb array the interpolated G value at R/B grid points using directional weighted average
+                                STC2VFU(rgb[1][indx], (wtuv * LC2VFU(rgb[1][indx - v1]) + wtdv * LC2VFU(rgb[1][indx + v1]) + wtlv * LC2VFU(rgb[1][indx - 1]) + wtrv * LC2VFU(rgb[1][indx + 1])) / (wtuv + wtdv + wtlv + wtrv));
+                            }
+#endif
+                            for (; cc < cc1 - 3; cc += 2, indx += 2) {
                                //compute directional weights using image gradients
                                float wtu = 1.f / SQR(eps + fabsf(rgb[1][(rr + 1) * ts + cc] - rgb[1][(rr - 1) * ts + cc]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][((rr - 2) * ts + cc) >> 1]) + fabsf(rgb[1][(rr - 1) * ts + cc] - rgb[1][(rr - 3) * ts + cc]));
-                                    float wtd = 1.f / SQR(eps + fabsf(rgb[1][(rr - 1) * ts + cc] - rgb[1][(rr + 1) * ts + cc]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][((rr + 2) * ts + cc) >> 1]) + fabsf(rgb[1][(rr + 1) * ts + cc] - rgb[1][(rr + 3) * ts + cc]));
+                                float wtd = 1.f / SQR(eps + fabsf(rgb[1][(rr + 1) * ts + cc] - rgb[1][(rr - 1) * ts + cc]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][((rr + 2) * ts + cc) >> 1]) + fabsf(rgb[1][(rr + 1) * ts + cc] - rgb[1][(rr + 3) * ts + cc]));
                                float wtl = 1.f / SQR(eps + fabsf(rgb[1][rr * ts + cc + 1] - rgb[1][rr * ts + cc - 1]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][(rr * ts + cc - 2) >> 1]) + fabsf(rgb[1][rr * ts + cc - 1] - rgb[1][rr * ts + cc - 3]));
-                                    float wtr = 1.f / SQR(eps + fabsf(rgb[1][rr * ts + cc - 1] - rgb[1][rr * ts + cc + 1]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][(rr * ts + cc + 2) >> 1]) + fabsf(rgb[1][rr * ts + cc + 1] - rgb[1][rr * ts + cc + 3]));
+                                float wtr = 1.f / SQR(eps + fabsf(rgb[1][rr * ts + cc + 1] - rgb[1][rr * ts + cc - 1]) + fabsf(rgb[c][(rr * ts + cc) >> 1] - rgb[c][(rr * ts + cc + 2) >> 1]) + fabsf(rgb[1][rr * ts + cc + 1] - rgb[1][rr * ts + cc + 3]));

                                //store in rgb array the interpolated G value at R/B grid points using directional weighted average
                                rgb[1][indx] = (wtu * rgb[1][indx - v1] + wtd * rgb[1][indx + v1] + wtl * rgb[1][indx - 1] + wtr * rgb[1][indx + 1]) / (wtu + wtd + wtl + wtr);
                            }
-
                        }
-
+                    }
+                    if (!autoCA) {
                        float hfrac = -((float)(hblock - 0.5) / (hblsz - 2) - 0.5);
                        float vfrac = -((float)(vblock - 0.5) / (vblsz - 2) - 0.5) * height / width;
                        lblockshifts[0][0] = 2 * vfrac * cared;
@@ -935,7 +967,6 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
                        for (int i = 0; i < polyord; i++) {
                            double powHblock = powVblock;
                            for (int j = 0; j < polyord; j++) {
-                                //printf("i= %d j= %d polycoeff= %f \n",i,j,fitparams[0][0][polyord*i+j]);
                                lblockshifts[0][0] += powHblock * fitparams[0][0][polyord * i + j];
                                lblockshifts[0][1] += powHblock * fitparams[0][1][polyord * i + j];
                                lblockshifts[1][0] += powHblock * fitparams[1][0][polyord * i + j];
@@ -1153,14 +1184,28 @@ void RawImageSource::CA_correct_RT(const bool autoCA, const double cared, const
        }

        // clean up
-        free(buffer);
+        free(bufferThr);
    }

-    free(Gtmp);
-    free(blockwt);
-    free(RawDataTmp);
+    if(autoCA && fitParamsTransfer && fitParamsOut) {
+        // store calculated parameters
+        int index = 0;
+        for(int c = 0; c < 2; ++c) {
+            for(int d = 0; d < 2; ++d) {
+                for(int e = 0; e < 16; ++e) {
+                    fitParamsTransfer[index++] = fitparams[c][d][e];
+                }
+            }
+        }
+    }
+
+    if(freeBuffer) {
+        free(buffer);
+        buffer = nullptr;
+    }

    if(plistener) {
        plistener->setProgress(1.0);
    }
+    return buffer;
 }
--- a/rtengine/pixelshift.cc
+++ b/rtengine/pixelshift.cc
@@ -26,7 +26,7 @@
 #include "procparams.h"
 #include "gauss.h"
 #include "median.h"
-#define BENCHMARK
+//#define BENCHMARK
 #include "StopWatch.h"
 namespace
 {
--- a/rtengine/rawimagesource.cc
+++ b/rtengine/rawimagesource.cc
@@ -39,7 +39,6 @@
 #include <omp.h>
 #endif
 #include "opthelper.h"
-#include "StopWatch.h"
 #define clipretinex( val, minv, maxv )    (( val = (val < minv ? minv : val ) ) > maxv ? maxv : val )
 #undef CLIPD
 #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f)
@@ -2015,11 +2014,14 @@ void RawImageSource::preprocess  (const RAWParams &raw, const LensProfParams &le
            plistener->setProgress (0.0);
        }
        if(numFrames == 4) {
-            for(int i=0; i<4; ++i) {
-                CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, *rawDataFrames[i]);
+            double fitParams[64];
+            float *buffer = CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, *rawDataFrames[0], fitParams, false, true, nullptr, false);
+            for(int i = 1; i < 3; ++i) {
+                CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, *rawDataFrames[i], fitParams, true, false, buffer, false);
            }
+            CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, *rawDataFrames[3], fitParams, true, false, buffer, true);
        } else {
-            CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, rawData);
+            CA_correct_RT(raw.ca_autocorrect, raw.cared, raw.cablue, 8.0, rawData, nullptr, false, false, nullptr, true);
        }
    }

--- a/rtengine/rawimagesource.h
+++ b/rtengine/rawimagesource.h
@@ -245,7 +245,7 @@ protected:
    inline  void interpolate_row_rb     (float* ar, float* ab, float* pg, float* cg, float* ng, int i);
    inline  void interpolate_row_rb_mul_pp (float* ar, float* ab, float* pg, float* cg, float* ng, int i, float r_mul, float g_mul, float b_mul, int x1, int width, int skip);

-    void CA_correct_RT  (const bool autoCA, const double cared, const double cablue, const double caautostrength, array2D<float> &rawData);
+    float* CA_correct_RT  (const bool autoCA, const double cared, const double cablue, const double caautostrength, array2D<float> &rawData, double *fitParamsTransfer, bool fitParamsIn, bool fitParamsOut, float * buffer, bool freeBuffer);
    void ddct8x8s(int isgn, float a[8][8]);
    void processRawWhitepoint (float expos, float preser, array2D<float> &rawData);  // exposure before interpolation