SSE2 code for BadpixelsLab()
This commit is contained in:
parent
63f14dda7e
commit
ad0e05f846
@ -34,15 +34,11 @@
|
|||||||
#include "jaggedarray.h"
|
#include "jaggedarray.h"
|
||||||
#define BENCHMARK
|
#define BENCHMARK
|
||||||
#include "StopWatch.h"
|
#include "StopWatch.h"
|
||||||
#ifdef _OPENMP
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
extern const Settings* settings;
|
|
||||||
|
|
||||||
void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh)
|
void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh)
|
||||||
{
|
{
|
||||||
@ -1243,50 +1239,68 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, in
|
|||||||
float atot = 0.f;
|
float atot = 0.f;
|
||||||
float btot = 0.f;
|
float btot = 0.f;
|
||||||
float norm = 0.f;
|
float norm = 0.f;
|
||||||
float wt;
|
|
||||||
|
|
||||||
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
||||||
for (int j1 = 0; j1 < j + halfwin; j1++) {
|
for (int j1 = 0; j1 < j + halfwin; j1++) {
|
||||||
wt = badpix[i1 * width + j1];
|
float wt = badpix[i1 * width + j1];
|
||||||
atot += wt * src->a[i1][j1];
|
atot += wt * src->a[i1][j1];
|
||||||
btot += wt * src->b[i1][j1];
|
btot += wt * src->b[i1][j1];
|
||||||
norm += wt;
|
norm += wt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(norm > 0.f) {
|
if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) {
|
||||||
const float a = atot / norm;
|
src->a[i][j] = atot / norm;
|
||||||
const float b = btot / norm;
|
src->b[i][j] = btot / norm;
|
||||||
if(SQR(a) + SQR(b) < chrom) {
|
|
||||||
src->a[i][j] = a;
|
|
||||||
src->b[i][j] = b;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(; j < width - halfwin; j++) { // this loop is the hot spot. Maybe worth to vectorize
|
#ifdef __SSE2__
|
||||||
|
vfloat chromv = F2V(chrom);
|
||||||
|
vfloat threshfactorv = F2V(threshfactor);
|
||||||
|
for(; j < width - halfwin - 3; j+=4) {
|
||||||
|
|
||||||
|
vmask selMask = vmaskf_lt(LVFU(badpix[i * width + j]), threshfactorv);
|
||||||
|
if (_mm_movemask_ps((vfloat)selMask)) {
|
||||||
|
vfloat atotv = ZEROV;
|
||||||
|
vfloat btotv = ZEROV;
|
||||||
|
vfloat normv = ZEROV;
|
||||||
|
|
||||||
|
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
||||||
|
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
|
||||||
|
vfloat wtv = LVFU(badpix[i1 * width + j1]);
|
||||||
|
atotv += wtv * LVFU(src->a[i1][j1]);
|
||||||
|
btotv += wtv * LVFU(src->b[i1][j1]);
|
||||||
|
normv += wtv;
|
||||||
|
}
|
||||||
|
selMask = vandm(selMask, vmaskf_lt(SQRV(atotv) + SQR(btotv), chromv * SQRV(normv)));
|
||||||
|
if(_mm_movemask_ps((vfloat)selMask)) {
|
||||||
|
vfloat aOrig = LVFU(src->a[i][j]);
|
||||||
|
vfloat bOrig = LVFU(src->b[i][j]);
|
||||||
|
STVFU(src->a[i][j], vself(selMask, atotv / normv, aOrig));
|
||||||
|
STVFU(src->b[i][j], vself(selMask, btotv / normv, bOrig));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for(; j < width - halfwin; j++) {
|
||||||
|
|
||||||
if (badpix[i * width + j] < threshfactor) {
|
if (badpix[i * width + j] < threshfactor) {
|
||||||
float atot = 0.f;
|
float atot = 0.f;
|
||||||
float btot = 0.f;
|
float btot = 0.f;
|
||||||
float norm = 0.f;
|
float norm = 0.f;
|
||||||
float wt;
|
|
||||||
|
|
||||||
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
||||||
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
|
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
|
||||||
wt = badpix[i1 * width + j1];
|
float wt = badpix[i1 * width + j1];
|
||||||
atot += wt * src->a[i1][j1];
|
atot += wt * src->a[i1][j1];
|
||||||
btot += wt * src->b[i1][j1];
|
btot += wt * src->b[i1][j1];
|
||||||
norm += wt;
|
norm += wt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(norm > 0.f) {
|
if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) {
|
||||||
const float a = atot / norm;
|
src->a[i][j] = atot / norm;
|
||||||
const float b = btot / norm;
|
src->b[i][j] = btot / norm;
|
||||||
if(SQR(a) + SQR(b) < chrom) {
|
|
||||||
src->a[i][j] = a;
|
|
||||||
src->b[i][j] = b;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1297,23 +1311,18 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, in
|
|||||||
float atot = 0.f;
|
float atot = 0.f;
|
||||||
float btot = 0.f;
|
float btot = 0.f;
|
||||||
float norm = 0.f;
|
float norm = 0.f;
|
||||||
float wt;
|
|
||||||
|
|
||||||
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
|
||||||
for (int j1 = j - halfwin + 1; j1 < width; j1++) {
|
for (int j1 = j - halfwin + 1; j1 < width; j1++) {
|
||||||
wt = badpix[i1 * width + j1];
|
float wt = badpix[i1 * width + j1];
|
||||||
atot += wt * src->a[i1][j1];
|
atot += wt * src->a[i1][j1];
|
||||||
btot += wt * src->b[i1][j1];
|
btot += wt * src->b[i1][j1];
|
||||||
norm += wt;
|
norm += wt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(norm > 0.f) {
|
if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) {
|
||||||
const float a = atot / norm;
|
src->a[i][j] = atot / norm;
|
||||||
const float b = btot / norm;
|
src->b[i][j] = btot / norm;
|
||||||
if(SQR(a) + SQR(b) < chrom) {
|
|
||||||
src->a[i][j] = a;
|
|
||||||
src->b[i][j] = b;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -196,6 +196,7 @@ void DirPyrEqualizer::read (const ProcParams* pp, const ParamsEdited* pedited)
|
|||||||
*/
|
*/
|
||||||
gamutlabConn.block (true);
|
gamutlabConn.block (true);
|
||||||
gamutlab->set_active (pp->dirpyrequalizer.gamutlab);
|
gamutlab->set_active (pp->dirpyrequalizer.gamutlab);
|
||||||
|
gamutlab->set_sensitive (pp->dirpyrequalizer.skinprotect != 0);
|
||||||
gamutlabConn.block (false);
|
gamutlabConn.block (false);
|
||||||
lastgamutlab = pp->dirpyrequalizer.gamutlab;
|
lastgamutlab = pp->dirpyrequalizer.gamutlab;
|
||||||
|
|
||||||
@ -339,6 +340,7 @@ void DirPyrEqualizer::adjusterChanged (Adjuster* a, double newval)
|
|||||||
Glib::ustring::format(std::fixed, std::setprecision(2), threshold->getValue()))
|
Glib::ustring::format(std::fixed, std::setprecision(2), threshold->getValue()))
|
||||||
);
|
);
|
||||||
} else if (a == skinprotect) {
|
} else if (a == skinprotect) {
|
||||||
|
gamutlab->set_sensitive (skinprotect->getValue() != 0);
|
||||||
listener->panelChanged (EvDirPyrEqualizerSkin,
|
listener->panelChanged (EvDirPyrEqualizerSkin,
|
||||||
Glib::ustring::compose("%1",
|
Glib::ustring::compose("%1",
|
||||||
Glib::ustring::format(std::fixed, std::setprecision(2), skinprotect->getValue()))
|
Glib::ustring::format(std::fixed, std::setprecision(2), skinprotect->getValue()))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user