Merge branch 'master' into gtk3

2016-04-23 03:18:40 +02:00
parent 3e2c710b45 50e6dfc95a
commit 7aa61e4459
10 changed files with 50 additions and 50 deletions
--- a/rtengine/CA_correct_RT.cc
+++ b/rtengine/CA_correct_RT.cc
@@ -197,7 +197,7 @@ void RawImageSource::CA_correct_RT(const double cared, const double cablue, cons

        // assign working space
        constexpr int buffersize = 3 * sizeof(float) * ts * ts + 6 * sizeof(float) * ts * tsh + 8 * 64 + 63;
-        char *buffer = (char *) calloc(buffersize, 1);
+        char *buffer = (char *) malloc(buffersize);
        char *data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);

        // shift the beginning of all arrays but the first by 64 bytes to avoid cache miss conflicts on CPUs which have <=4-way associative L1-Cache
@@ -231,6 +231,7 @@ void RawImageSource::CA_correct_RT(const double cared, const double cablue, cons
            #pragma omp for collapse(2) schedule(dynamic) nowait
            for (int top = -border ; top < height; top += ts - border2)
                for (int left = -border; left < width; left += ts - border2) {
+                    memset(buffer, 0, buffersize);
                    const int vblock = ((top + border) / (ts - border2)) + 1;
                    const int hblock = ((left + border) / (ts - border2)) + 1;
                    const int bottom = min(top + ts, height + border);
@@ -730,6 +731,7 @@ void RawImageSource::CA_correct_RT(const double cared, const double cablue, cons

            for (int top = -border; top < height; top += ts - border2)
                for (int left = -border; left < width; left += ts - border2) {
+                    memset(buffer, 0, buffersize);
                    float lblockshifts[2][2];
                    const int vblock = ((top + border) / (ts - border2)) + 1;
                    const int hblock = ((left + border) / (ts - border2)) + 1;
--- a/rtengine/LUT.h
+++ b/rtengine/LUT.h
@@ -92,9 +92,9 @@ protected:
 private:
    unsigned int owner;
 #if defined( __SSE2__ ) && defined( __x86_64__ )
-    vfloat maxsv __attribute__ ((aligned (16)));
-    vfloat sizev __attribute__ ((aligned (16)));
-    vint sizeiv __attribute__ ((aligned (16)));
+    vfloat maxsv ALIGNED16;
+    vfloat sizev ALIGNED16;
+    vint sizeiv ALIGNED16;
 #endif
 public:
    /// convenience flag! If one doesn't want to delete the buffer but want to flag it to be recomputed...
--- a/rtengine/dirpyr_equalizer.cc
+++ b/rtengine/dirpyr_equalizer.cc
@@ -418,7 +418,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da
            __m128 thousandv = _mm_set1_ps( 1000.0f );
            __m128 dirwtv, valv, normv, dftemp1v, dftemp2v;
 //  multiplied each value of domkerv by 1000 to avoid multiplication by 1000 inside the loop
-            float domkerv[5][5][4] __attribute__ ((aligned (16))) = {{{1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}}};
+            float domkerv[5][5][4] ALIGNED16 = {{{1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {2000, 2000, 2000, 2000}, {1000, 1000, 1000, 1000}}, {{1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}, {1000, 1000, 1000, 1000}}};
 #endif // __SSE2__

            int j;
@@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da

                        for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
                            dftemp2v = LVFU(data_fine[inbr][jnbr]);
-                            dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
+                            dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
                            valv += dirwtv * dftemp2v;
                            normv += dirwtv;
                        }
--- a/rtengine/helpersse2.h
+++ b/rtengine/helpersse2.h
@@ -22,18 +22,18 @@ typedef __m128i vint2;
 //
 #ifdef __GNUC__
 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
-#define LVF(x) _mm_load_ps(&x)
+#define LVF(x) _mm_load_ps((float*)&x)
 #define LVFU(x) _mm_loadu_ps(&x)
 #define STVF(x,y) _mm_store_ps(&x,y)
 #define STVFU(x,y) _mm_storeu_ps(&x,y)
 #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
-#define LVF(x) _mm_loadu_ps(&x)
+#define LVF(x) _mm_loadu_ps((float*)&x)
 #define LVFU(x) _mm_loadu_ps(&x)
 #define STVF(x,y) _mm_storeu_ps(&x,y)
 #define STVFU(x,y) _mm_storeu_ps(&x,y)
 #endif
 #else
-#define LVF(x) _mm_load_ps(&x)
+#define LVF(x) _mm_load_ps((float*)&x)
 #define LVFU(x) _mm_loadu_ps(&x)
 #define STVF(x,y) _mm_store_ps(&x,y)
 #define STVFU(x,y) _mm_storeu_ps(&x,y)
--- a/rtengine/shmap.cc
+++ b/rtengine/shmap.cc
@@ -475,7 +475,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
        {
 #if defined( __SSE2__ ) && defined( __x86_64__ )
            __m128 dirwtv, valv, normv, dftemp1v, dftemp2v, fgg;
-            float domkerv[5][5][4] __attribute__ ((aligned (16))) = {{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}};
+            float domkerv[5][5][4] ALIGNED16 = {{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}};

 #endif // __SSE2__
            int j;
@@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i

                        for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
                            dftemp2v = LVFU(data_fine[inbr][jnbr]);
-                            dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
+                            dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
                            valv += dirwtv * dftemp2v;
                            normv += dirwtv;
                        }