fix broken non SSE build and add SSE code for one loop in wavcont()

2020-07-04 11:35:37 +02:00 · 2020-07-04 11:35:37 +02:00 · 8728aee0ce
commit 8728aee0ce
parent af4b17ac84
2 changed files with 30 additions and 16 deletions
--- a/rtengine/LUT.h
+++ b/rtengine/LUT.h
@ -99,8 +99,8 @@ protected:
    unsigned int size;
    unsigned int upperBound;  // always equals size-1, parameter created for performance reason
 private:
-#ifdef __SSE2__
    unsigned int owner;
+#ifdef __SSE2__
    alignas(16) vfloat maxsv;
    alignas(16) vfloat sizev;
    alignas(16) vint sizeiv;
--- a/rtengine/iplocallab.cc
+++ b/rtengine/iplocallab.cc
@ -7184,16 +7184,6 @@ void ImProcFunctions::wavcont(const struct local_params& lp, float ** tmp, wavel
                float lutFactor;
                const float inVals[] = {0.05f, 0.2f, 0.7f, 1.f, 1.f, 0.8f, 0.65f, 0.5f, 0.4f, 0.25f, 0.1f};
                const auto meaLut = buildMeaLut(inVals, mea, lutFactor);
-
-                float klev = (loccomprewavCurve[level * 55.5f] - 0.75f);
-                if (klev < 0.f) {
-                    klev *= 2.6666f;//compression increase contraste
-                } else {
-                    klev *= 4.f;//dilatation reduce contraste - detailattenuator
-                }
-                const float compression = expf(-klev);
-                const float detailattenuator = std::max(klev, 0.f);
-
                const auto wav_L = wdspot.level_coeffs(level)[dir];

 #ifdef _OPENMP
@ -7206,13 +7196,36 @@ void ImProcFunctions::wavcont(const struct local_params& lp, float ** tmp, wavel
                    }
                }

+                float klev = (loccomprewavCurve[level * 55.5f] - 0.75f);
+                if (klev < 0.f) {
+                    klev *= 2.6666f;//compression increase contraste
+                } else {
+                    klev *= 4.f;//dilatation reduce contraste - detailattenuator
+                }
+                const float compression = expf(-klev);
+                const float detailattenuator = std::max(klev, 0.f);
+
                Compresslevels(templevel, W_L, H_L, compression, detailattenuator, thres, mean[level], MaxP[level], meanN[level], MaxN[level], madL[level][dir - 1]);
 #ifdef _OPENMP
-                #pragma omp parallel for if (multiThread)
+                #pragma omp parallel if (multiThread)
+#endif
+                {
+#ifdef __SSE2__
+                    const vfloat lutFactorv = F2V(lutFactor);
+#endif
+#ifdef _OPENMP
+                    #pragma omp for
 #endif
                    for (int y = 0; y < H_L; y++) {
-                    for (int x = 0; x < W_L; x++) {
-                        int j = y * W_L + x;
+                        int x = 0;
+                        int j = y * W_L;
+#ifdef __SSE2__
+                        for (; x < W_L - 3; x += 4, j += 4) {
+                            const vfloat valv = LVFU(wav_L[j]);
+                            STVFU(wav_L[j], intp((*meaLut)[vabsf(valv) * lutFactorv], LVFU(templevel[y][x]), valv));
+                        }
+#endif
+                        for (; x < W_L; x++, j++) {
                            wav_L[j] = intp((*meaLut)[std::fabs(wav_L[j]) * lutFactor], templevel[y][x], wav_L[j]);
                        }
                    }
@ -7220,6 +7233,7 @@ void ImProcFunctions::wavcont(const struct local_params& lp, float ** tmp, wavel
            }
        }
    }
+}


 void ImProcFunctions::wavcontrast4(struct local_params& lp, float ** tmp, float ** tmpa, float ** tmpb, float contrast, float radblur, float radlevblur, int bfw, int bfh, int level_bl, int level_hl, int level_br, int level_hr, int sk, int numThreads,