Now that medians are known *before* processing, moved multiplication step inside the same loop as exponentiation for further optimization. Patch kindly provided by @heckflosse ;-)
This commit is contained in:
parent
2bbf8bd864
commit
8fa30d496d
@ -29,6 +29,9 @@
|
|||||||
#include "opthelper.h"
|
#include "opthelper.h"
|
||||||
#include "rt_algo.h"
|
#include "rt_algo.h"
|
||||||
|
|
||||||
|
//#define BENCHMARK
|
||||||
|
#include "StopWatch.h"
|
||||||
|
|
||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -126,6 +129,8 @@ bool RawImageSource::getFilmNegativeExponents (Coord2D spotA, Coord2D spotB, int
|
|||||||
|
|
||||||
void RawImageSource::filmNegativeProcess(const procparams::FilmNegativeParams ¶ms)
|
void RawImageSource::filmNegativeProcess(const procparams::FilmNegativeParams ¶ms)
|
||||||
{
|
{
|
||||||
|
// BENCHFUNMICRO
|
||||||
|
|
||||||
if(!params.enabled)
|
if(!params.enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -199,19 +204,23 @@ void RawImageSource::filmNegativeProcess(const procparams::FilmNegativeParams &p
|
|||||||
// to get the reciprocals. Avoid trouble with zeroes, minimum pixel value is 1.
|
// to get the reciprocals. Avoid trouble with zeroes, minimum pixel value is 1.
|
||||||
const float exps0 = -exps[FC(row, col)];
|
const float exps0 = -exps[FC(row, col)];
|
||||||
const float exps1 = -exps[FC(row, col + 1)];
|
const float exps1 = -exps[FC(row, col + 1)];
|
||||||
|
const float mult0 = mults[FC(row, col)];
|
||||||
|
const float mult1 = mults[FC(row, col + 1)];
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
const vfloat expsv = _mm_setr_ps(exps0, exps1, exps0, exps1);
|
const vfloat expsv = _mm_setr_ps(exps0, exps1, exps0, exps1);
|
||||||
|
const vfloat multsv = _mm_setr_ps(mult0, mult1, mult0, mult1);
|
||||||
const vfloat onev = F2V(1.f);
|
const vfloat onev = F2V(1.f);
|
||||||
|
const vfloat c65535v = F2V(65535.f);
|
||||||
for (; col < W - 3; col+=4) {
|
for (; col < W - 3; col+=4) {
|
||||||
STVFU(rawData[row][col], pow_F(vmaxf(LVFU(rawData[row][col]), onev), expsv));
|
STVFU(rawData[row][col], vminf(multsv * pow_F(vmaxf(LVFU(rawData[row][col]), onev), expsv), c65535v));
|
||||||
}
|
}
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
for (; col < W - 1; col+=2) {
|
for (; col < W - 1; col+=2) {
|
||||||
rawData[row][col] = pow_F(max(rawData[row][col], 1.f), exps0);
|
rawData[row][col] = rtengine::min(mult0 * pow_F(max(rawData[row][col], 1.f), exps0), 65535.f);
|
||||||
rawData[row][col + 1] = pow_F(max(rawData[row][col + 1], 1.f), exps1);
|
rawData[row][col + 1] = rtengine::min(mult1 * pow_F(max(rawData[row][col + 1], 1.f), exps1), 65535.f);
|
||||||
}
|
}
|
||||||
if (col < W) {
|
if (col < W) {
|
||||||
rawData[row][col] = pow_F(max(rawData[row][col], 1.f), exps0);
|
rawData[row][col] = rtengine::min(mult0 * pow_F(max(rawData[row][col], 1.f), exps0), 65535.f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(ri->getSensorType() == ST_FUJI_XTRANS) {
|
} else if(ri->getSensorType() == ST_FUJI_XTRANS) {
|
||||||
@ -223,24 +232,29 @@ void RawImageSource::filmNegativeProcess(const procparams::FilmNegativeParams &p
|
|||||||
// Exponents are expressed as positive in the parameters, so negate them in order
|
// Exponents are expressed as positive in the parameters, so negate them in order
|
||||||
// to get the reciprocals. Avoid trouble with zeroes, minimum pixel value is 1.
|
// to get the reciprocals. Avoid trouble with zeroes, minimum pixel value is 1.
|
||||||
const float expsc[6] = {-exps[ri->XTRANSFC(row, 0)], -exps[ri->XTRANSFC(row, 1)], -exps[ri->XTRANSFC(row, 2)], -exps[ri->XTRANSFC(row, 3)], -exps[ri->XTRANSFC(row, 4)], -exps[ri->XTRANSFC(row, 5)]};
|
const float expsc[6] = {-exps[ri->XTRANSFC(row, 0)], -exps[ri->XTRANSFC(row, 1)], -exps[ri->XTRANSFC(row, 2)], -exps[ri->XTRANSFC(row, 3)], -exps[ri->XTRANSFC(row, 4)], -exps[ri->XTRANSFC(row, 5)]};
|
||||||
|
const float multsc[6] = {mults[ri->XTRANSFC(row, 0)], mults[ri->XTRANSFC(row, 1)], mults[ri->XTRANSFC(row, 2)], mults[ri->XTRANSFC(row, 3)], mults[ri->XTRANSFC(row, 4)], mults[ri->XTRANSFC(row, 5)]};
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
const vfloat expsv0 = _mm_setr_ps(expsc[0], expsc[1], expsc[2], expsc[3]);
|
const vfloat expsv0 = _mm_setr_ps(expsc[0], expsc[1], expsc[2], expsc[3]);
|
||||||
const vfloat expsv1 = _mm_setr_ps(expsc[4], expsc[5], expsc[0], expsc[1]);
|
const vfloat expsv1 = _mm_setr_ps(expsc[4], expsc[5], expsc[0], expsc[1]);
|
||||||
const vfloat expsv2 = _mm_setr_ps(expsc[2], expsc[3], expsc[4], expsc[5]);
|
const vfloat expsv2 = _mm_setr_ps(expsc[2], expsc[3], expsc[4], expsc[5]);
|
||||||
|
const vfloat multsv0 = _mm_setr_ps(multsc[0], multsc[1], multsc[2], multsc[3]);
|
||||||
|
const vfloat multsv1 = _mm_setr_ps(multsc[4], multsc[5], multsc[0], multsc[1]);
|
||||||
|
const vfloat multsv2 = _mm_setr_ps(multsc[2], multsc[3], multsc[4], multsc[5]);
|
||||||
const vfloat onev = F2V(1.f);
|
const vfloat onev = F2V(1.f);
|
||||||
|
const vfloat c65535v = F2V(65535.f);
|
||||||
for (; col < W - 11; col+=12) {
|
for (; col < W - 11; col+=12) {
|
||||||
STVFU(rawData[row][col], pow_F(vmaxf(LVFU(rawData[row][col]), onev), expsv0));
|
STVFU(rawData[row][col], vminf(multsv0 * pow_F(vmaxf(LVFU(rawData[row][col]), onev), expsv0), c65535v));
|
||||||
STVFU(rawData[row][col + 4], pow_F(vmaxf(LVFU(rawData[row][col + 4]), onev), expsv1));
|
STVFU(rawData[row][col + 4], vminf(multsv1 * pow_F(vmaxf(LVFU(rawData[row][col + 4]), onev), expsv1), c65535v));
|
||||||
STVFU(rawData[row][col + 8], pow_F(vmaxf(LVFU(rawData[row][col + 8]), onev), expsv2));
|
STVFU(rawData[row][col + 8], vminf(multsv2 * pow_F(vmaxf(LVFU(rawData[row][col + 8]), onev), expsv2), c65535v));
|
||||||
}
|
}
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
for (; col < W - 5; col+=6) {
|
for (; col < W - 5; col+=6) {
|
||||||
for (int c = 0; c < 6; ++c) {
|
for (int c = 0; c < 6; ++c) {
|
||||||
rawData[row][col + c] = pow_F(max(rawData[row][col + c], 1.f), expsc[c]);
|
rawData[row][col + c] = rtengine::min(multsc[c] * pow_F(max(rawData[row][col + c], 1.f), expsc[c]), 65535.f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int c = 0; col < W; col++, c++) {
|
for (int c = 0; col < W; col++, c++) {
|
||||||
rawData[row][col + c] = pow_F(max(rawData[row][col + c], 1.f), expsc[c]);
|
rawData[row][col + c] = rtengine::min(multsc[c] * pow_F(max(rawData[row][col + c], 1.f), expsc[c]), 65535.f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,41 +264,8 @@ void RawImageSource::filmNegativeProcess(const procparams::FilmNegativeParams &p
|
|||||||
if (settings->verbose)
|
if (settings->verbose)
|
||||||
printf("Pow loop time us: %d\n", t4.etime(t3));
|
printf("Pow loop time us: %d\n", t4.etime(t3));
|
||||||
|
|
||||||
if(ri->getSensorType() == ST_BAYER) {
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp for nowait
|
|
||||||
#endif
|
|
||||||
for (int row = 0; row < H; row ++) {
|
|
||||||
for (int col = 0; col < W; col++) {
|
|
||||||
int c = FC(row, col); // three colors, 0=R, 1=G, 2=B
|
|
||||||
// Apply the multipliers, clamp max output value to 65535
|
|
||||||
float out = rawData[row][col] * mults[c];
|
|
||||||
rawData[row][col] = out > 65535.f ? 65535.f : out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if(ri->getSensorType() == ST_FUJI_XTRANS) {
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp for nowait
|
|
||||||
#endif
|
|
||||||
for (int row = 0; row < H; row ++) {
|
|
||||||
for (int col = 0; col < W; col++) {
|
|
||||||
int c = ri->XTRANSFC(row, col); // three colors, 0=R, 1=G, 2=B
|
|
||||||
// Apply the multipliers, clamp max output value to 65535
|
|
||||||
float out = rawData[row][col] * mults[c];
|
|
||||||
rawData[row][col] = out > 65535.f ? 65535.f : out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
t5.set();
|
t5.set();
|
||||||
if (settings->verbose)
|
|
||||||
printf("Mult loop time us: %d\n", t5.etime(t4));
|
|
||||||
|
|
||||||
|
|
||||||
PixelsMap bitmapBads(W, H);
|
PixelsMap bitmapBads(W, H);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user