Speedup for histogram panel
This commit is contained in:
parent
2dd2f5ca17
commit
aa5072fa0a
@ -26,17 +26,20 @@ typedef __m128i vint2;
|
|||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
|
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
||||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
||||||
#define LVF(x) _mm_loadu_ps((float*)&x)
|
#define LVF(x) _mm_loadu_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
|
#define LVI(x) _mm_loadu_si128((__m128i*)&x)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define LVF(x) _mm_load_ps((float*)&x)
|
#define LVF(x) _mm_load_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
|
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) && defined(__AVX__)
|
#if defined(__x86_64__) && defined(__AVX__)
|
||||||
|
@ -25,8 +25,7 @@
|
|||||||
#include "rtimage.h"
|
#include "rtimage.h"
|
||||||
#include "../rtengine/improccoordinator.h"
|
#include "../rtengine/improccoordinator.h"
|
||||||
#include "../rtengine/color.h"
|
#include "../rtengine/color.h"
|
||||||
|
#include "../rtengine/opthelper.h"
|
||||||
|
|
||||||
using namespace rtengine;
|
using namespace rtengine;
|
||||||
|
|
||||||
extern Glib::ustring argv0;
|
extern Glib::ustring argv0;
|
||||||
@ -852,7 +851,7 @@ void HistogramArea::update (LUTu &histRed, LUTu &histGreen, LUTu &histBlue, LUTu
|
|||||||
g_idle_add (histupdateUI, haih);
|
g_idle_add (histupdateUI, haih);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HistogramArea::renderHistogram ()
|
SSEFUNCTION void HistogramArea::renderHistogram ()
|
||||||
{
|
{
|
||||||
|
|
||||||
if (!is_realized ()) {
|
if (!is_realized ()) {
|
||||||
@ -878,7 +877,7 @@ void HistogramArea::renderHistogram ()
|
|||||||
|
|
||||||
// make double copies of LUT, one for faster access, another one to scale down the raw histos
|
// make double copies of LUT, one for faster access, another one to scale down the raw histos
|
||||||
LUTu rhchanged(256), ghchanged(256), bhchanged(256);
|
LUTu rhchanged(256), ghchanged(256), bhchanged(256);
|
||||||
unsigned int lhisttemp[256], chisttemp[256], rhtemp[256], ghtemp[256], bhtemp[256];
|
unsigned int lhisttemp[256] ALIGNED16 {0}, chisttemp[256] ALIGNED16 {0}, rhtemp[256] ALIGNED16 {0}, ghtemp[256] ALIGNED16 {0}, bhtemp[256] ALIGNED16 {0};
|
||||||
const int scale = (rawMode ? 8 : 1);
|
const int scale = (rawMode ? 8 : 1);
|
||||||
|
|
||||||
for(int i = 0; i < 256; i++) {
|
for(int i = 0; i < 256; i++) {
|
||||||
@ -937,31 +936,48 @@ void HistogramArea::renderHistogram ()
|
|||||||
if (!fullMode) {
|
if (!fullMode) {
|
||||||
int area = 0;
|
int area = 0;
|
||||||
|
|
||||||
if(!rawMode)
|
#ifdef __SSE2__
|
||||||
for (int i = 0; i < fullhistheight; i++) {
|
vint onev = _mm_set1_epi32(1);
|
||||||
for (int j = 0; j < 256; j++)
|
vint iv = (vint)ZEROV;
|
||||||
if ((needLuma && lhisttemp[j] > i) || (needChroma && chisttemp[j] > i) || (needRed && rhtemp[j] > i) || (needGreen && ghtemp[j] > i) || (needBlue && bhtemp[j] > i)) {
|
#endif
|
||||||
area++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((double)area / (256 * (i + 1)) < 0.3) {
|
for (int i = 0; i < fullhistheight; i++) {
|
||||||
realhistheight = i;
|
#ifdef __SSE2__
|
||||||
break;
|
vint areatempv = (vint)ZEROV;
|
||||||
}
|
|
||||||
}
|
for (int j = 0; j < 256; j += 4) {
|
||||||
else
|
vmask mask1v = _mm_cmpgt_epi32(LVI(lhisttemp[j]), iv);
|
||||||
for (int i = 0; i < fullhistheight; i++) {
|
vmask mask2v = _mm_cmpgt_epi32(LVI(rhtemp[j]), iv);
|
||||||
for (int j = 0; j < 256; j++)
|
vmask mask3v = _mm_cmpgt_epi32(LVI(ghtemp[j]), iv);
|
||||||
if ((needRed && rhtemp[j] > i) || (needGreen && ghtemp[j] > i) || (needBlue && bhtemp[j] > i)) {
|
vmask mask4v = _mm_cmpgt_epi32(LVI(bhtemp[j]), iv);
|
||||||
area++;
|
mask1v = _mm_or_si128(mask1v, mask2v);
|
||||||
}
|
mask3v = _mm_or_si128(mask3v, mask4v);
|
||||||
|
mask2v = _mm_cmpgt_epi32(LVI(chisttemp[j]), iv);
|
||||||
|
mask1v = _mm_or_si128(mask1v, mask3v);
|
||||||
|
mask1v = _mm_or_si128(mask1v, mask2v);
|
||||||
|
areatempv = _mm_add_epi32(areatempv, _mm_and_si128(mask1v, onev));
|
||||||
|
|
||||||
if ((double)area / (256 * (i + 1)) < 0.3) {
|
|
||||||
realhistheight = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
areatempv = _mm_add_epi32(areatempv, (vint)_mm_movehl_ps((vfloat)areatempv, (vfloat)areatempv));
|
||||||
|
areatempv = _mm_add_epi32(areatempv, _mm_shuffle_epi32(areatempv, 1));
|
||||||
|
area += _mm_cvtsi128_si32(areatempv);
|
||||||
|
iv = _mm_add_epi32(iv, onev);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
for (int j = 0; j < 256; j++)
|
||||||
|
if (lhisttemp[j] > i || rhtemp[j] > i || ghtemp[j] > i || bhtemp[j] > i || chisttemp[j] > i) {
|
||||||
|
area++;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((double)area / (256 * (i + 1)) < 0.3) {
|
||||||
|
realhistheight = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (realhistheight < winh - 2) {
|
if (realhistheight < winh - 2) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user