605 lines
16 KiB
C++
605 lines
16 KiB
C++
/*
|
|
* LUT.h
|
|
* This file is part of RawTherapee.
|
|
*
|
|
* Copyright (c) 2011 Jan Rinze Peterzon (janrinze@gmail.com)
|
|
*
|
|
* RawTherapee is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* RawTherapee is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/*
|
|
* Declaration of flexible Lookup Tables
|
|
*
|
|
* Usage:
|
|
*
|
|
* LUT<type> name (size);
|
|
* LUT<type> name (size, flags);
|
|
*
|
|
* creates an array which is valid within the normal C/C++ scope "{ ... }"
|
|
*
|
|
* access to elements is a simple as:
|
|
*
|
|
* LUT<float> my_lut (10);
|
|
* float value = my_lut[3];
|
|
* float value = my_lut[2.5]; // this will interpolate
|
|
*
|
|
* when using a float type index it will interpolate the lookup values
|
|
*
|
|
* extra setting in flags: (clipping is set by default)
|
|
* LUT_CLIP_ABOVE
|
|
* LUT_CLIP_BELOW
|
|
*
|
|
* example:
|
|
* LUT<float> my_lut (10,LUT_CLIP_BELOW);
|
|
* float value = my_lut[22.5]; // this will extrapolate
|
|
* float value = my_lut[-22.5]; // this will not extrapolate
|
|
*
|
|
* LUT<float> my_lut (10,0); // this will extrapolate on either side
|
|
*
|
|
* shotcuts:
|
|
*
|
|
* LUTf stands for LUT<float>
|
|
* LUTi stands for LUT<int>
|
|
* LUTu stands for LUT<unsigned int>
|
|
* LUTd stands for LUT<double>
|
|
* LUTuc stands for LUT<unsigned char>
|
|
*/
|
|
|
|
#ifndef LUT_H_
|
|
#define LUT_H_
|
|
|
|
// bit representations of flags
|
|
#define LUT_CLIP_BELOW 1
|
|
#define LUT_CLIP_ABOVE 2
|
|
|
|
#define LUTf LUT<float>
|
|
#define LUTi LUT<int>
|
|
#define LUTu LUT<unsigned int>
|
|
#define LUTd LUT<double>
|
|
#define LUTuc LUT<unsigned char>
|
|
|
|
#include <cstring>
|
|
#ifndef NDEBUG
|
|
#include <glibmm.h>
|
|
#include <fstream>
|
|
#endif
|
|
#include "opthelper.h"
|
|
#include <assert.h>
|
|
#include "rt_math.h"
|
|
|
|
template<typename T>
|
|
class LUT
|
|
{
|
|
protected:
|
|
// list of variables ordered to improve cache speed
|
|
unsigned int maxs;
|
|
float maxsf;
|
|
T * data;
|
|
unsigned int clip;
|
|
unsigned int size;
|
|
unsigned int upperBound; // always equals size-1, parameter created for performance reason
|
|
private:
|
|
unsigned int owner;
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
vfloat maxsv ALIGNED16;
|
|
vfloat sizev ALIGNED16;
|
|
vint sizeiv ALIGNED16;
|
|
#endif
|
|
public:
|
|
/// convenience flag! If one doesn't want to delete the buffer but want to flag it to be recomputed...
|
|
/// The user have to handle it itself, even if some method can (re)initialize it
|
|
bool dirty;
|
|
|
|
LUT(int s, int flags = 0xfffffff)
|
|
{
|
|
#ifndef NDEBUG
|
|
|
|
if (s <= 0) {
|
|
printf("s<=0!\n");
|
|
}
|
|
|
|
assert (s > 0);
|
|
#endif
|
|
dirty = true;
|
|
clip = flags;
|
|
data = new T[s];
|
|
owner = 1;
|
|
size = s;
|
|
upperBound = size - 1;
|
|
maxs = size - 2;
|
|
maxsf = (float)maxs;
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
maxsv = F2V( maxs );
|
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
|
sizev = F2V( size - 1 );
|
|
#endif
|
|
}
|
|
void operator ()(int s, int flags = 0xfffffff)
|
|
{
|
|
#ifndef NDEBUG
|
|
|
|
if (s <= 0) {
|
|
printf("s<=0!\n");
|
|
}
|
|
|
|
assert (s > 0);
|
|
#endif
|
|
|
|
if (owner && data) {
|
|
delete[] data;
|
|
}
|
|
|
|
dirty = true; // Assumption!
|
|
clip = flags;
|
|
data = new T[s];
|
|
owner = 1;
|
|
size = s;
|
|
upperBound = size - 1;
|
|
maxs = size - 2;
|
|
maxsf = (float)maxs;
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
maxsv = F2V( maxs );
|
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
|
sizev = F2V( size - 1 );
|
|
#endif
|
|
}
|
|
|
|
LUT(int s, T * source, int flags = 0xfffffff)
|
|
{
|
|
#ifndef NDEBUG
|
|
|
|
if (s <= 0) {
|
|
printf("s<=0!\n");
|
|
}
|
|
|
|
assert (s > 0);
|
|
|
|
if (!source) {
|
|
printf("source is NULL!\n");
|
|
}
|
|
|
|
assert (source != nullptr);
|
|
#endif
|
|
dirty = false; // Assumption
|
|
clip = flags;
|
|
data = new T[s];
|
|
owner = 1;
|
|
size = s;
|
|
upperBound = size - 1;
|
|
maxs = size - 2;
|
|
maxsf = (float)maxs;
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
maxsv = F2V( size - 2);
|
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
|
sizev = F2V( size - 1 );
|
|
#endif
|
|
|
|
for (int i = 0; i < s; i++) {
|
|
data[i] = source[i];
|
|
}
|
|
}
|
|
|
|
LUT()
|
|
{
|
|
data = nullptr;
|
|
reset();
|
|
}
|
|
|
|
~LUT()
|
|
{
|
|
if (owner) {
|
|
delete[] data;
|
|
#ifndef NDEBUG
|
|
data = (T*)0xBAADF00D;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void setClip(int flags)
|
|
{
|
|
clip = flags;
|
|
}
|
|
|
|
/** @brief Get the number of element in the LUT (i.e. dimension of the array)
|
|
* For a LUT(500), it will return 500
|
|
* @return number of element in the array
|
|
*/
|
|
int getSize()
|
|
{
|
|
return size;
|
|
}
|
|
|
|
/** @brief Get the highest value possible (i.e. dimension of the array)
|
|
* For a LUT(500), it will return 499, because 500 elements, starting from 0, goes up to 499
|
|
* @return number of element in the array
|
|
*/
|
|
int getUpperBound()
|
|
{
|
|
return size > 0 ? upperBound : 0;
|
|
}
|
|
|
|
LUT<T> & operator=(LUT<T> &rhs)
|
|
{
|
|
if (this != &rhs) {
|
|
if (rhs.size > this->size) {
|
|
delete [] this->data;
|
|
this->data = nullptr;
|
|
}
|
|
|
|
if (this->data == nullptr) {
|
|
this->data = new T[rhs.size];
|
|
}
|
|
|
|
this->clip = rhs.clip;
|
|
this->owner = 1;
|
|
memcpy(this->data, rhs.data, rhs.size * sizeof(T));
|
|
this->size = rhs.size;
|
|
this->upperBound = rhs.upperBound;
|
|
this->maxs = this->size - 2;
|
|
this->maxsf = (float)this->maxs;
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
this->maxsv = F2V( this->size - 2);
|
|
this->sizeiv = _mm_set1_epi32( (int)(this->size - 1) );
|
|
this->sizev = F2V( this->size - 1 );
|
|
#endif
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
// handy to sum up per thread histograms. #pragma omp simd speeds up the loop by about factor 3 for LUTu (unsigned int).
|
|
LUT<T> & operator+=(LUT<T> &rhs)
|
|
{
|
|
if (rhs.size == this->size) {
|
|
#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324
|
|
#pragma omp simd
|
|
#endif
|
|
|
|
for(unsigned int i = 0; i < this->size; i++) {
|
|
data[i] += rhs.data[i];
|
|
}
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
// use with integer indices
|
|
T& operator[](int index) const
|
|
{
|
|
return data[ rtengine::LIM<int>(index, 0, upperBound) ];
|
|
}
|
|
|
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
|
/*
|
|
vfloat operator[](vfloat indexv ) const
|
|
{
|
|
// printf("don't use this operator. It's not ready for production");
|
|
return _mm_setzero_ps();
|
|
|
|
// convert floats to ints
|
|
vint idxv = _mm_cvttps_epi32( indexv );
|
|
vfloat tempv, resultv, p1v, p2v;
|
|
vmask maxmask = vmaskf_gt(indexv, maxsv);
|
|
idxv = _mm_castps_si128(vself(maxmask, maxsv, _mm_castsi128_ps(idxv)));
|
|
vmask minmask = vmaskf_lt(indexv, _mm_setzero_ps());
|
|
idxv = _mm_castps_si128(vself(minmask, _mm_setzero_ps(), _mm_castsi128_ps(idxv)));
|
|
// access the LUT 4 times and shuffle the values into p1v and p2v
|
|
|
|
int idx;
|
|
|
|
// get 4th value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(3, 3, 3, 3)));
|
|
tempv = LVFU(data[idx]);
|
|
p1v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(0, 0, 0, 0));
|
|
p2v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1, 1, 1, 1));
|
|
// now p1v is 3 3 3 3
|
|
// p2v is 3 3 3 3
|
|
|
|
// get 3rd value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(2, 2, 2, 2)));
|
|
tempv = LVFU(data[idx]);
|
|
p1v = _mm_move_ss( p1v, tempv);
|
|
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1, 1, 1, 1));
|
|
p2v = _mm_move_ss( p2v, tempv);
|
|
// now p1v is 3 3 3 2
|
|
// p2v is 3 3 3 2
|
|
|
|
// get 2nd value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
tempv = LVFU(data[idx]);
|
|
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(1, 0, 1, 0));
|
|
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(1, 0, 1, 0));
|
|
// now p1v is 3 2 3 2
|
|
// now p2v is 3 2 3 2
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 3 1
|
|
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1, 1, 1, 1));
|
|
p2v = _mm_move_ss( p2v, tempv);
|
|
// now p1v is 3 2 3 1
|
|
|
|
// get 1st value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(0, 0, 0, 0)));
|
|
tempv = LVFU(data[idx]);
|
|
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(3, 2, 0, 0));
|
|
// now p1v is 3 2 1 1
|
|
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(3, 2, 0, 0));
|
|
// now p2v is 3 2 1 1
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 1 0
|
|
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1, 1, 1, 1));
|
|
p2v = _mm_move_ss( p2v, tempv);
|
|
// now p2v is 3 2 1 0
|
|
|
|
vfloat diffv = indexv - _mm_cvtepi32_ps ( idxv );
|
|
diffv = vself(vorm(maxmask, minmask), _mm_setzero_ps(), diffv);
|
|
resultv = p1v + p2v * diffv;
|
|
return resultv ;
|
|
}
|
|
*/
|
|
#ifdef __SSE4_1__
|
|
vfloat operator[](vint idxv ) const
|
|
{
|
|
vfloat tempv, p1v;
|
|
idxv = _mm_max_epi32( _mm_setzero_si128(), _mm_min_epi32(idxv, sizeiv));
|
|
// access the LUT 4 times and shuffle the values into p1v
|
|
|
|
int idx;
|
|
|
|
// get 4th value
|
|
idx = _mm_extract_epi32(idxv, 3);
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS(tempv, _MM_SHUFFLE(0, 0, 0, 0));
|
|
// now p1v is 3 3 3 3
|
|
|
|
// get 3rd value
|
|
idx = _mm_extract_epi32(idxv, 2);
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = _mm_move_ss( p1v, tempv);
|
|
// now p1v is 3 3 3 2
|
|
|
|
// get 2nd value
|
|
idx = _mm_extract_epi32(idxv, 1);
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS( p1v, _MM_SHUFFLE(1, 0, 1, 0));
|
|
// now p1v is 3 2 3 2
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 3 1
|
|
|
|
// get 1st value
|
|
idx = _mm_cvtsi128_si32(idxv);
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS( p1v, _MM_SHUFFLE(3, 2, 0, 0));
|
|
// now p1v is 3 2 1 1
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 1 0
|
|
|
|
return p1v;
|
|
}
|
|
#else
|
|
vfloat operator[](vint idxv ) const
|
|
{
|
|
vfloat tempv, p1v;
|
|
tempv = _mm_cvtepi32_ps(idxv);
|
|
tempv = _mm_min_ps( tempv, sizev );
|
|
idxv = _mm_cvttps_epi32(_mm_max_ps( tempv, _mm_setzero_ps( ) ));
|
|
// access the LUT 4 times and shuffle the values into p1v
|
|
|
|
int idx;
|
|
|
|
// get 4th value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(3, 3, 3, 3)));
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS(tempv, _MM_SHUFFLE(0, 0, 0, 0));
|
|
// now p1v is 3 3 3 3
|
|
|
|
// get 3rd value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(2, 2, 2, 2)));
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = _mm_move_ss( p1v, tempv);
|
|
// now p1v is 3 3 3 2
|
|
|
|
// get 2nd value
|
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS( p1v, _MM_SHUFFLE(1, 0, 1, 0));
|
|
// now p1v is 3 2 3 2
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 3 1
|
|
|
|
// get 1st value
|
|
idx = _mm_cvtsi128_si32 (idxv);
|
|
tempv = _mm_load_ss(&data[idx]);
|
|
p1v = PERMUTEPS( p1v, _MM_SHUFFLE(3, 2, 0, 0));
|
|
// now p1v is 3 2 1 1
|
|
p1v = _mm_move_ss( p1v, tempv );
|
|
// now p1v is 3 2 1 0
|
|
|
|
return p1v;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
// use with float indices
|
|
T operator[](float index) const
|
|
{
|
|
int idx = (int)index; // don't use floor! The difference in negative space is no problems here
|
|
|
|
if (index < 0.f) {
|
|
if (clip & LUT_CLIP_BELOW) {
|
|
return data[0];
|
|
}
|
|
|
|
idx = 0;
|
|
} else if (index > maxsf) {
|
|
if (clip & LUT_CLIP_ABOVE) {
|
|
return data[upperBound];
|
|
}
|
|
|
|
idx = maxs;
|
|
}
|
|
|
|
float diff = index - (float) idx;
|
|
T p1 = data[idx];
|
|
T p2 = data[idx + 1] - p1;
|
|
return (p1 + p2 * diff);
|
|
}
|
|
|
|
// Return the value for "index" that is in the [0-1] range.
|
|
T getVal01 (float index) const
|
|
{
|
|
index *= float(upperBound);
|
|
int idx = (int)index; // don't use floor! The difference in negative space is no problems here
|
|
|
|
if (index < 0.f) {
|
|
if (clip & LUT_CLIP_BELOW) {
|
|
return data[0];
|
|
}
|
|
|
|
idx = 0;
|
|
} else if (index > maxsf) {
|
|
if (clip & LUT_CLIP_ABOVE) {
|
|
return data[upperBound];
|
|
}
|
|
|
|
idx = maxs;
|
|
}
|
|
|
|
float diff = index - (float) idx;
|
|
T p1 = data[idx];
|
|
T p2 = data[idx + 1] - p1;
|
|
return (p1 + p2 * diff);
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
// Debug facility ; dump the content of the LUT in a file. No control of the filename is done
|
|
void dump(Glib::ustring fname)
|
|
{
|
|
if (size) {
|
|
Glib::ustring fname_ = fname + ".xyz"; // TopSolid'Design "plot" file format
|
|
std::ofstream f (fname_.c_str());
|
|
f << "$" << std::endl;
|
|
|
|
for (unsigned int iter = 0; iter < size; iter++) {
|
|
f << iter << ", " << data[iter] << ", 0." << std::endl;
|
|
}
|
|
|
|
f << "$" << std::endl;
|
|
f.close ();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
operator bool (void) const
|
|
{
|
|
return size > 0;
|
|
}
|
|
|
|
void clear(void)
|
|
{
|
|
if (data && size) {
|
|
memset(data, 0, size * sizeof(T));
|
|
}
|
|
}
|
|
|
|
void reset(void)
|
|
{
|
|
if (data) {
|
|
delete[] data;
|
|
}
|
|
|
|
dirty = true;
|
|
data = nullptr;
|
|
owner = 1;
|
|
size = 0;
|
|
upperBound = 0;
|
|
maxs = 0;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
// TODO: HOMBRE: HueLUT is actually unused, could we delete this class now that LUT::getVal01 has been created?
|
|
|
|
|
|
/** @brief LUT subclass handling hue values specifically.
|
|
The array has a fixed size of float values and have to be in the [0.; 1.] range in both axis (no error checking implemented) */
|
|
class HueLUT : public LUTf
|
|
{
|
|
public:
|
|
HueLUT() : LUTf() {}
|
|
explicit HueLUT(bool createArray) : LUTf()
|
|
{
|
|
if (createArray) {
|
|
this->operator () (501, LUT_CLIP_BELOW | LUT_CLIP_ABOVE);
|
|
}
|
|
}
|
|
|
|
void create()
|
|
{
|
|
this->operator () (501, LUT_CLIP_BELOW | LUT_CLIP_ABOVE);
|
|
}
|
|
|
|
// use with integer indices
|
|
float& operator[](int index) const
|
|
{
|
|
return data[ rtengine::LIM<int>(index, 0, upperBound) ];
|
|
}
|
|
|
|
// use with float indices in the [0.;1.] range
|
|
float operator[](float index) const
|
|
{
|
|
int idx = int(index * 500.f); // don't use floor! The difference in negative space is no problems here
|
|
|
|
if (index < 0.f) {
|
|
return data[0];
|
|
} else if (index > 1.f) {
|
|
return data[upperBound];
|
|
}
|
|
|
|
float balance = index - float(idx / 500.f);
|
|
float h1 = data[idx];
|
|
float h2 = data[idx + 1];
|
|
|
|
if (h1 == h2) {
|
|
return h1;
|
|
}
|
|
|
|
if ((h1 > h2) && (h1 - h2 > 0.5f)) {
|
|
h1 -= 1.f;
|
|
float value = h1 + balance * (h2 - h1);
|
|
|
|
if (value < 0.f) {
|
|
value += 1.f;
|
|
}
|
|
|
|
return value;
|
|
} else if (h2 - h1 > 0.5f) {
|
|
h2 -= 1.f;
|
|
float value = h1 + balance * (h2 - h1);
|
|
|
|
if (value < 0.f) {
|
|
value += 1.f;
|
|
}
|
|
|
|
return value;
|
|
} else {
|
|
return h1 + balance * (h2 - h1);
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
#endif /* LUT_H_ */
|