378 lines
9.7 KiB
C++

/*
* LUT.h
* This file is part of RawTherapee.
*
* Copyright (c) 2011 Jan Rinze Peterzon (janrinze@gmail.com)
*
* RawTherapee is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* RawTherapee is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Declaration of flexible Lookup Tables
*
* Usage:
*
* LUT<type> name (size);
* LUT<type> name (size, flags);
*
* creates an array which is valid within the normal C/C++ scope "{ ... }"
*
* access to elements is a simple as:
*
* LUT<float> my_lut (10);
* float value = my_lut[3];
* float value = my_lut[2.5]; // this will interpolate
*
* when using a float type index it will interpolate the lookup values
*
* extra setting in flags: (clipping is set by default)
* LUT_CLIP_ABOVE
* LUT_CLIP_BELOW
*
* example:
* LUT<float> my_lut (10,LUT_CLIP_BELOW);
* float value = my_lut[22.5]; // this will extrapolate
* float value = my_lut[-22.5]; // this will not extrapolate
*
* LUT<float> my_lut (10,0); // this will extrapolate on either side
*
* shotcuts:
*
* LUTf stands for LUT<float>
* LUTi stands for LUT<int>
* LUTu stands for LUT<unsigned int>
*/
#ifndef LUT_H_
#define LUT_H_
// bit representations of flags
#define LUT_CLIP_BELOW 1
#define LUT_CLIP_ABOVE 2
#define LUTf LUT<float>
#define LUTi LUT<int>
#define LUTu LUT<unsigned int>
#define LUTd LUT<double>
#include <cstring>
#ifndef NDEBUG
#include <glibmm.h>
#include <fstream>
#endif
#ifdef __SSE2__
#include "sleefsseavx.c"
#endif
#include <assert.h>
#include "rt_math.h"
template<typename T>
class LUT {
private:
// list of variables ordered to improve cache speed
unsigned int maxs;
T * data;
unsigned int clip, size, owner;
#if defined( __SSE2__ ) && defined( __x86_64__ )
__m128 maxsv __attribute__ ((aligned (16)));
__m128 sizev __attribute__ ((aligned (16)));
__m128i maxsiv __attribute__ ((aligned (16)));
__m128i sizeiv __attribute__ ((aligned (16)));
#endif
public:
/// convenience flag! If one doesn't want to delete the buffer but want to flag it to be recomputed...
/// The user have to handle it itself, even if some method can (re)initialize it
bool dirty;
LUT(int s, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
#endif
dirty = true;
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( maxs );
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
}
void operator ()(int s, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
#endif
if (owner&&data)
delete[] data;
dirty = true; // Assumption!
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( maxs );
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
}
LUT(int s, T * source, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
if (source==NULL)
printf("source is NULL!\n");
assert (source != NULL);
#endif
dirty = false; // Assumption
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( size - 2);
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
for (int i = 0; i < s; i++) {
data[i] = source[i];
}
}
LUT() {
data = NULL;
reset();
}
~LUT() {
if (owner) {
delete[] data;
#ifndef NDEBUG
data=(T*)0xBAADF00D;
#endif
}
}
void setClip(int flags) {
clip = flags;
}
LUT<T> & operator=(LUT<T> &rhs) {
if (this != &rhs) {
if (rhs.size>this->size)
{
delete [] this->data;
this->data=NULL;
}
if (this->data==NULL) this->data=new T[rhs.size];
this->clip=rhs.clip;
this->owner=1;
memcpy(this->data,rhs.data,rhs.size*sizeof(T));
this->size=rhs.size;
this->maxs=this->size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
this->maxsv = _mm_set1_ps( this->size - 2);
this->maxsiv = _mm_cvttps_epi32( this->maxsv );
this->sizeiv = _mm_set1_epi32( (int)(this->size-1) );
this->sizev = _mm_set1_ps( this->size-1 );
#endif
}
return *this;
}
// use with integer indices
T& operator[](int index) const {
return data[ rtengine::LIM<int>(index, 0, size-1) ];
}
#if defined( __SSE2__ ) && defined( __x86_64__ )
__m128 operator[](__m128 indexv ) const {
printf("don't use this operator. It's not ready for production");
return _mm_setzero_ps();
// convert floats to ints
__m128i idxv = _mm_cvttps_epi32( indexv );
__m128 tempv, resultv, p1v, p2v;
vmask maxmask = vmaskf_gt(indexv, maxsv);
idxv = _mm_castps_si128(vself(maxmask, maxsv, _mm_castsi128_ps(idxv)));
vmask minmask = vmaskf_lt(indexv, _mm_setzero_ps());
idxv = _mm_castps_si128(vself(minmask, _mm_setzero_ps(), _mm_castsi128_ps(idxv)));
// access the LUT 4 times and shuffle the values into p1v and p2v
int idx;
// get 4th value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(3,3,3,3)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(0,0,0,0));
p2v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
// now p1v is 3 3 3 3
// p2v is 3 3 3 3
// get 3rd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(2,2,2,2)));
tempv = LVFU(data[idx]);
p1v = _mm_move_ss( p1v, tempv);
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p1v is 3 3 3 2
// p2v is 3 3 3 2
// get 2nd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(1,1,1,1)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(1,0,1,0));
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(1,0,1,0));
// now p1v is 3 2 3 2
// now p2v is 3 2 3 2
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 3 1
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p1v is 3 2 3 1
// get 1st value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(0,0,0,0)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(3,2,0,0));
// now p1v is 3 2 1 1
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(3,2,0,0));
// now p2v is 3 2 1 1
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 1 0
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p2v is 3 2 1 0
__m128 diffv = indexv - _mm_cvtepi32_ps ( idxv );
diffv = vself(vorm(maxmask,minmask), _mm_setzero_ps(), diffv);
resultv = p1v + p2v * diffv;
return resultv ;
}
__m128 operator[](__m128i idxv ) const
{
__m128 tempv, p1v;
tempv = _mm_cvtepi32_ps(idxv);
tempv = _mm_min_ps( tempv, sizev );
idxv = _mm_cvttps_epi32(_mm_max_ps( tempv, _mm_setzero_ps( ) ));
// access the LUT 4 times and shuffle the values into p1v
int idx;
// get 4th value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(3,3,3,3)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(0,0,0,0));
// now p1v is 3 3 3 3
// get 3rd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(2,2,2,2)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_move_ss( p1v, tempv);
// now p1v is 3 3 3 2
// get 2nd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(1,1,1,1)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(1,0,1,0));
// now p1v is 3 2 3 2
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 3 1
// get 1st value
idx = _mm_cvtsi128_si32 (idxv);
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(3,2,0,0));
// now p1v is 3 2 1 1
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 1 0
return p1v;
}
#endif
// use with float indices
T operator[](float index) const {
int idx = (int)index; // don't use floor! The difference in negative space is no problems here
if (index<0.f)
{
if (clip & LUT_CLIP_BELOW)
return data[0];
idx=0;
}
else if (index > float(maxs))
{
if (clip & LUT_CLIP_ABOVE)
return data[size - 1];
idx =maxs;
}
float diff = index - (float) idx;
T p1 = data[idx];
T p2 = data[idx + 1]-p1;
return (p1 + p2*diff);
}
#ifndef NDEBUG
// Debug facility ; dump the content of the LUT in a file. No control of the filename is done
void dump(Glib::ustring fname) {
if (size) {
Glib::ustring fname_ = fname + ".xyz"; // TopSolid'Design "plot" file format
std::ofstream f (fname_.c_str());
f << "$" << std::endl;
for (unsigned int iter=0; iter<size; iter++) {
f << iter << ", " << data[iter] << ", 0." << std::endl;
}
f << "$" << std::endl;
f.close ();
}
}
#endif
operator bool (void) const
{
return size>0;
}
void clear(void) {
if (data && size)
memset(data, 0, size * sizeof(T));
}
void reset(void) {
if (data) delete[] data;
dirty = true;
data = NULL;
owner = 1;
size = 0;
maxs=0;
}
};
#endif /* LUT_H_ */