Simpler interface for gaussian blur, speedup for double precision gaussian blur and speedup for retinex transmission curve

This commit is contained in:
heckflosse
2015-09-16 00:14:58 +02:00
parent 6eba3914f4
commit 5f97c4f15b
8 changed files with 132 additions and 431 deletions

View File

@@ -26,15 +26,18 @@ typedef __m128i vint2;
#define LVF(x) _mm_load_ps(&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3
#define LVF(x) _mm_loadu_ps(&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_storeu_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)
#endif
#else
#define LVF(x) _mm_load_ps(&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)
#endif
// Load 8 floats from a and combine a[0],a[2],a[4] and a[6] into a vector of 4 floats