Reduce peak memory usage of queue processing, Issue 2527

This commit is contained in:
Ingo
2014-10-20 16:46:41 +02:00
parent ee8ef7813e
commit 2a660e1965
11 changed files with 159 additions and 179 deletions

View File

@@ -23,19 +23,13 @@
#include "improcfun.h"
#include "cieimage.h"
#include "sleef.c"
#ifdef __SSE2__
#include "sleefsseavx.c"
#endif
#include "opthelper.h"
using namespace std;
namespace rtengine {
#if defined( __SSE2__ ) && defined( WIN32 )
__attribute__((force_align_arg_pointer)) void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
#else
void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
#endif
SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
{
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -240,13 +234,8 @@ void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
}
#if defined( __SSE2__ ) && defined( WIN32 )
__attribute__((force_align_arg_pointer)) void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh)
#else
void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh)
#endif
SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3])
{
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// impulse noise removal
// local variables
@@ -256,67 +245,16 @@ void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh)
float piid=3.14159265f/180.f;
// buffer for the lowpass image
float ** lpf = new float *[height];
float ** lpf = buffers[0];
// buffer for the highpass image
float ** impish = new float *[height];
for (int i=0; i<height; i++) {
lpf[i] = new float [width];
//memset (lpf[i], 0, width*sizeof(float));
impish[i] = new float [width];
//memset (impish[i], 0, width*sizeof(unsigned short));
}
float ** impish = buffers[1];
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// modified bilateral filter for lowpass image, omitting input pixel; or Gaussian blur
const float eps = 1.0f;
float** sraa;
sraa = new float*[height];
for (int i=0; i<height; i++)
sraa[i] = new float[width];
float** srbb;
srbb = new float*[height];
for (int i=0; i<height; i++)
srbb[i] = new float[width];
#ifdef _OPENMP
#pragma omp parallel
#endif
{
int j;
float2 sincosval;
#ifdef __SSE2__
vfloat2 sincosvalv;
__m128 piidv = _mm_set1_ps( piid );
__m128 tempv;
#endif
#ifdef _OPENMP
#pragma omp for
#endif
for (int i=0; i<height; i++) {
#ifdef __SSE2__
for (j=0; j<width-3; j+=4) {
sincosvalv = xsincosf(piidv*LVFU(ncie->h_p[i][j]));
tempv = LVFU(ncie->C_p[i][j]);
_mm_storeu_ps(&sraa[i][j], tempv * sincosvalv.y);
_mm_storeu_ps(&srbb[i][j], tempv * sincosvalv.x);
}
for (; j<width; j++) {
sincosval = xsincosf(piid*ncie->h_p[i][j]);
sraa[i][j]=ncie->C_p[i][j]*sincosval.y;
srbb[i][j]=ncie->C_p[i][j]*sincosval.x;
}
#else
for (j=0; j<width; j++) {
sincosval = xsincosf(piid*ncie->h_p[i][j]);
sraa[i][j]=ncie->C_p[i][j]*sincosval.y;
srbb[i][j]=ncie->C_p[i][j]*sincosval.x;
}
#endif
}
}
//The cleaning algorithm starts here
@@ -404,6 +342,48 @@ void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh)
}
//now impulsive values have been identified
const float eps = 1.0f;
float** sraa = buffers[0]; // we can reuse buffers[0] because lpf is not needed anymore at this point
float** srbb = buffers[2];
#ifdef _OPENMP
#pragma omp parallel
#endif
{
int j;
float2 sincosval;
#ifdef __SSE2__
vfloat2 sincosvalv;
__m128 piidv = _mm_set1_ps( piid );
__m128 tempv;
#endif
#ifdef _OPENMP
#pragma omp for
#endif
for (int i=0; i<height; i++) {
#ifdef __SSE2__
for (j=0; j<width-3; j+=4) {
sincosvalv = xsincosf(piidv*LVFU(ncie->h_p[i][j]));
tempv = LVFU(ncie->C_p[i][j]);
_mm_storeu_ps(&sraa[i][j], tempv * sincosvalv.y);
_mm_storeu_ps(&srbb[i][j], tempv * sincosvalv.x);
}
for (; j<width; j++) {
sincosval = xsincosf(piid*ncie->h_p[i][j]);
sraa[i][j]=ncie->C_p[i][j]*sincosval.y;
srbb[i][j]=ncie->C_p[i][j]*sincosval.x;
}
#else
for (j=0; j<width; j++) {
sincosval = xsincosf(piid*ncie->h_p[i][j]);
sraa[i][j]=ncie->C_p[i][j]*sincosval.y;
srbb[i][j]=ncie->C_p[i][j]*sincosval.x;
}
#endif
}
}
// Issue 1671:
// often, noise isn't evenly distributed, e.g. only a few noisy pixels in the bright sky, but many in the dark foreground,
@@ -524,19 +504,6 @@ void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh)
}
}
for (int i=0; i<height; i++) {
delete [] lpf[i];
delete [] impish[i];
}
delete [] lpf;
delete [] impish;
for (int i=0; i<height; i++)
delete [] sraa[i];
delete [] sraa;
for (int i=0; i<height; i++)
delete [] srbb[i];
delete [] srbb;
}