Use AlignedBuffer helper class in rgbProc, use SSE in standard tone curve application.
This commit is contained in:
@@ -21,6 +21,10 @@
|
||||
#include <cstdlib>
|
||||
#include <utility>
|
||||
|
||||
inline size_t padToAlignment(size_t size, size_t align = 16) {
|
||||
return align * ((size + align - 1) / align);
|
||||
}
|
||||
|
||||
// Aligned buffer that should be faster
|
||||
template <class T> class AlignedBuffer
|
||||
{
|
||||
|
@@ -800,6 +800,13 @@ class StandardToneCurve : public ToneCurve
|
||||
{
|
||||
public:
|
||||
void Apply(float& r, float& g, float& b) const;
|
||||
|
||||
// Applies the tone curve to `r`, `g`, `b` arrays, starting at `r[start]`
|
||||
// and ending at `r[end]` (and respectively for `b` and `g`). Uses SSE
|
||||
// and requires that `r`, `g`, and `b` pointers have the same alignment.
|
||||
void BatchApply(
|
||||
const size_t start, const size_t end,
|
||||
float *r, float *g, float *b) const;
|
||||
};
|
||||
|
||||
class AdobeToneCurve : public ToneCurve
|
||||
@@ -874,6 +881,52 @@ inline void StandardToneCurve::Apply (float& r, float& g, float& b) const
|
||||
g = lutToneCurve[g];
|
||||
b = lutToneCurve[b];
|
||||
}
|
||||
inline void StandardToneCurve::BatchApply(
|
||||
const size_t start, const size_t end,
|
||||
float *r, float *g, float *b) const {
|
||||
assert (lutToneCurve);
|
||||
|
||||
// All pointers must have the same alignment for SSE usage. In the loop body below,
|
||||
// we will only check `r`, assuming that the same result would hold for `g` and `b`.
|
||||
assert (reinterpret_cast<uintptr_t>(r) % 16 == reinterpret_cast<uintptr_t>(g) % 16);
|
||||
assert (reinterpret_cast<uintptr_t>(g) % 16 == reinterpret_cast<uintptr_t>(b) % 16);
|
||||
|
||||
size_t i = start;
|
||||
while (true) {
|
||||
if (i >= end) {
|
||||
// If we get to the end before getting to an aligned address, just return.
|
||||
// (Or, for non-SSE mode, if we get to the end.)
|
||||
return;
|
||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
|
||||
// Otherwise, we get to the first aligned address; go to the SSE part.
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
r[i] = lutToneCurve[r[i]];
|
||||
g[i] = lutToneCurve[g[i]];
|
||||
b[i] = lutToneCurve[b[i]];
|
||||
i++;
|
||||
}
|
||||
|
||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||
for (; i + 3 < end; i += 4) {
|
||||
__m128i r_val = _mm_cvtps_epi32(LVF(r[i]));
|
||||
__m128i g_val = _mm_cvtps_epi32(LVF(g[i]));
|
||||
__m128i b_val = _mm_cvtps_epi32(LVF(b[i]));
|
||||
STVF(r[i], lutToneCurve[r_val]);
|
||||
STVF(g[i], lutToneCurve[g_val]);
|
||||
STVF(b[i], lutToneCurve[b_val]);
|
||||
}
|
||||
|
||||
// Remainder in non-SSE.
|
||||
for (; i < end; ++i) {
|
||||
r[i] = lutToneCurve[r[i]];
|
||||
g[i] = lutToneCurve[g[i]];
|
||||
b[i] = lutToneCurve[b[i]];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Tone curve according to Adobe's reference implementation
|
||||
// values in 0xffff space
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "alignedbuffer.h"
|
||||
#include "rtengine.h"
|
||||
#include "improcfun.h"
|
||||
#include "curves.h"
|
||||
@@ -3409,31 +3410,28 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
{
|
||||
char *buffer;
|
||||
size_t perChannelSizeBytes = padToAlignment(sizeof (float) * TS * TS + 4 * 64);
|
||||
AlignedBuffer<float> buffer(3 * perChannelSizeBytes);
|
||||
char *editIFloatBuffer = nullptr;
|
||||
char *editWhateverBuffer = nullptr;
|
||||
|
||||
buffer = (char *) malloc (3 * sizeof (float) * TS * TS + 20 * 64 + 63);
|
||||
char *data;
|
||||
data = (char*) ( ( uintptr_t (buffer) + uintptr_t (63)) / 64 * 64);
|
||||
|
||||
float *rtemp = (float (*))data;
|
||||
float *gtemp = (float (*)) ((char*)rtemp + sizeof (float) * TS * TS + 4 * 64);
|
||||
float *btemp = (float (*)) ((char*)gtemp + sizeof (float) * TS * TS + 8 * 64);
|
||||
float *rtemp = buffer.data;
|
||||
float *gtemp = &rtemp[perChannelSizeBytes / sizeof(float)];
|
||||
float *btemp = >emp[perChannelSizeBytes / sizeof(float)];
|
||||
int istart;
|
||||
int jstart;
|
||||
int tW;
|
||||
int tH;
|
||||
|
||||
// zero out the buffers
|
||||
memset(buffer, 0, 3 * sizeof (float) * TS * TS + 20 * 64 + 63);
|
||||
memset(rtemp, 0, 3 * perChannelSizeBytes);
|
||||
|
||||
// Allocating buffer for the PipetteBuffer
|
||||
float *editIFloatTmpR = nullptr, *editIFloatTmpG = nullptr, *editIFloatTmpB = nullptr, *editWhateverTmp = nullptr;
|
||||
|
||||
if (editImgFloat) {
|
||||
editIFloatBuffer = (char *) malloc (3 * sizeof (float) * TS * TS + 20 * 64 + 63);
|
||||
data = (char*) ( ( uintptr_t (editIFloatBuffer) + uintptr_t (63)) / 64 * 64);
|
||||
char *data = (char*) ( ( uintptr_t (editIFloatBuffer) + uintptr_t (63)) / 64 * 64);
|
||||
|
||||
editIFloatTmpR = (float (*))data;
|
||||
editIFloatTmpG = (float (*)) ((char*)editIFloatTmpR + sizeof (float) * TS * TS + 4 * 64);
|
||||
@@ -3442,7 +3440,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
||||
|
||||
if (editWhatever) {
|
||||
editWhateverBuffer = (char *) malloc (sizeof (float) * TS * TS + 20 * 64 + 63);
|
||||
data = (char*) ( ( uintptr_t (editWhateverBuffer) + uintptr_t (63)) / 64 * 64);
|
||||
char *data = (char*) ( ( uintptr_t (editWhateverBuffer) + uintptr_t (63)) / 64 * 64);
|
||||
|
||||
editWhateverTmp = (float (*))data;
|
||||
}
|
||||
@@ -3618,10 +3616,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
||||
if (hasToneCurve1) {
|
||||
if (curveMode == ToneCurveParams::TcMode::STD) { // Standard
|
||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||
for (int j = jstart, tj = 0; j < tW; j++, tj++) {
|
||||
const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve1);
|
||||
userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
|
||||
}
|
||||
const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve1);
|
||||
userToneCurve.BatchApply (
|
||||
0, tW - jstart,
|
||||
&rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS]);
|
||||
}
|
||||
} else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like
|
||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||
@@ -4529,8 +4527,6 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
||||
}
|
||||
}
|
||||
|
||||
free (buffer);
|
||||
|
||||
if (editIFloatBuffer) {
|
||||
free (editIFloatBuffer);
|
||||
}
|
||||
|
Reference in New Issue
Block a user