Speedup for histogram panel
This commit is contained in:
parent
2dd2f5ca17
commit
aa5072fa0a
@ -26,17 +26,20 @@ typedef __m128i vint2;
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
||||
#define LVF(x) _mm_loadu_ps((float*)&x)
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
#define LVI(x) _mm_loadu_si128((__m128i*)&x)
|
||||
#endif
|
||||
#else
|
||||
#define LVF(x) _mm_load_ps((float*)&x)
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && defined(__AVX__)
|
||||
|
@ -25,8 +25,7 @@
|
||||
#include "rtimage.h"
|
||||
#include "../rtengine/improccoordinator.h"
|
||||
#include "../rtengine/color.h"
|
||||
|
||||
|
||||
#include "../rtengine/opthelper.h"
|
||||
using namespace rtengine;
|
||||
|
||||
extern Glib::ustring argv0;
|
||||
@ -852,7 +851,7 @@ void HistogramArea::update (LUTu &histRed, LUTu &histGreen, LUTu &histBlue, LUTu
|
||||
g_idle_add (histupdateUI, haih);
|
||||
}
|
||||
|
||||
void HistogramArea::renderHistogram ()
|
||||
SSEFUNCTION void HistogramArea::renderHistogram ()
|
||||
{
|
||||
|
||||
if (!is_realized ()) {
|
||||
@ -878,7 +877,7 @@ void HistogramArea::renderHistogram ()
|
||||
|
||||
// make double copies of LUT, one for faster access, another one to scale down the raw histos
|
||||
LUTu rhchanged(256), ghchanged(256), bhchanged(256);
|
||||
unsigned int lhisttemp[256], chisttemp[256], rhtemp[256], ghtemp[256], bhtemp[256];
|
||||
unsigned int lhisttemp[256] ALIGNED16 {0}, chisttemp[256] ALIGNED16 {0}, rhtemp[256] ALIGNED16 {0}, ghtemp[256] ALIGNED16 {0}, bhtemp[256] ALIGNED16 {0};
|
||||
const int scale = (rawMode ? 8 : 1);
|
||||
|
||||
for(int i = 0; i < 256; i++) {
|
||||
@ -937,31 +936,48 @@ void HistogramArea::renderHistogram ()
|
||||
if (!fullMode) {
|
||||
int area = 0;
|
||||
|
||||
if(!rawMode)
|
||||
for (int i = 0; i < fullhistheight; i++) {
|
||||
for (int j = 0; j < 256; j++)
|
||||
if ((needLuma && lhisttemp[j] > i) || (needChroma && chisttemp[j] > i) || (needRed && rhtemp[j] > i) || (needGreen && ghtemp[j] > i) || (needBlue && bhtemp[j] > i)) {
|
||||
area++;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
vint onev = _mm_set1_epi32(1);
|
||||
vint iv = (vint)ZEROV;
|
||||
#endif
|
||||
|
||||
if ((double)area / (256 * (i + 1)) < 0.3) {
|
||||
realhistheight = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < fullhistheight; i++) {
|
||||
for (int j = 0; j < 256; j++)
|
||||
if ((needRed && rhtemp[j] > i) || (needGreen && ghtemp[j] > i) || (needBlue && bhtemp[j] > i)) {
|
||||
area++;
|
||||
}
|
||||
for (int i = 0; i < fullhistheight; i++) {
|
||||
#ifdef __SSE2__
|
||||
vint areatempv = (vint)ZEROV;
|
||||
|
||||
for (int j = 0; j < 256; j += 4) {
|
||||
vmask mask1v = _mm_cmpgt_epi32(LVI(lhisttemp[j]), iv);
|
||||
vmask mask2v = _mm_cmpgt_epi32(LVI(rhtemp[j]), iv);
|
||||
vmask mask3v = _mm_cmpgt_epi32(LVI(ghtemp[j]), iv);
|
||||
vmask mask4v = _mm_cmpgt_epi32(LVI(bhtemp[j]), iv);
|
||||
mask1v = _mm_or_si128(mask1v, mask2v);
|
||||
mask3v = _mm_or_si128(mask3v, mask4v);
|
||||
mask2v = _mm_cmpgt_epi32(LVI(chisttemp[j]), iv);
|
||||
mask1v = _mm_or_si128(mask1v, mask3v);
|
||||
mask1v = _mm_or_si128(mask1v, mask2v);
|
||||
areatempv = _mm_add_epi32(areatempv, _mm_and_si128(mask1v, onev));
|
||||
|
||||
if ((double)area / (256 * (i + 1)) < 0.3) {
|
||||
realhistheight = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
areatempv = _mm_add_epi32(areatempv, (vint)_mm_movehl_ps((vfloat)areatempv, (vfloat)areatempv));
|
||||
areatempv = _mm_add_epi32(areatempv, _mm_shuffle_epi32(areatempv, 1));
|
||||
area += _mm_cvtsi128_si32(areatempv);
|
||||
iv = _mm_add_epi32(iv, onev);
|
||||
|
||||
#else
|
||||
|
||||
for (int j = 0; j < 256; j++)
|
||||
if (lhisttemp[j] > i || rhtemp[j] > i || ghtemp[j] > i || bhtemp[j] > i || chisttemp[j] > i) {
|
||||
area++;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if ((double)area / (256 * (i + 1)) < 0.3) {
|
||||
realhistheight = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (realhistheight < winh - 2) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user