Files
rawTherapee/rtengine/LUT.h
Hombre 8b2eac9a3d Pipette and "On Preview Widgets" branch. See issue 227
The pipette part is already working quite nice but need to be finished. The widgets part needs more work...
2014-01-21 23:37:36 +01:00

387 lines
9.8 KiB
C++

/*
* LUT.h
* This file is part of RawTherapee.
*
* Copyright (c) 2011 Jan Rinze Peterzon (janrinze@gmail.com)
*
* RawTherapee is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* RawTherapee is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Declaration of flexible Lookup Tables
*
* Usage:
*
* LUT<type> name (size);
* LUT<type> name (size, flags);
*
* creates an array which is valid within the normal C/C++ scope "{ ... }"
*
* access to elements is a simple as:
*
* LUT<float> my_lut (10);
* float value = my_lut[3];
* float value = my_lut[2.5]; // this will interpolate
*
* when using a float type index it will interpolate the lookup values
*
* extra setting in flags: (clipping is set by default)
* LUT_CLIP_ABOVE
* LUT_CLIP_BELOW
*
* example:
* LUT<float> my_lut (10,LUT_CLIP_BELOW);
* float value = my_lut[22.5]; // this will extrapolate
* float value = my_lut[-22.5]; // this will not extrapolate
*
* LUT<float> my_lut (10,0); // this will extrapolate on either side
*
* shotcuts:
*
* LUTf stands for LUT<float>
* LUTi stands for LUT<int>
* LUTu stands for LUT<unsigned int>
*/
#ifndef LUT_H_
#define LUT_H_
// bit representations of flags
#define LUT_CLIP_BELOW 1
#define LUT_CLIP_ABOVE 2
#define LUTf LUT<float>
#define LUTi LUT<int>
#define LUTu LUT<unsigned int>
#define LUTd LUT<double>
#include <cstring>
#ifndef NDEBUG
#include <glibmm.h>
#include <fstream>
#endif
#ifdef __SSE2__
#include "sleefsseavx.c"
#endif
#include <assert.h>
template<typename T>
class LUT {
private:
// list of variables ordered to improve cache speed
unsigned int maxs;
T * data;
unsigned int clip, size, owner;
#if defined( __SSE2__ ) && defined( __x86_64__ )
__m128 maxsv __attribute__ ((aligned (16)));
__m128 sizev __attribute__ ((aligned (16)));
__m128i maxsiv __attribute__ ((aligned (16)));
__m128i sizeiv __attribute__ ((aligned (16)));
#endif
public:
/// convenience flag! If one doesn't want to delete the buffer but want to flag it to be recomputed...
/// The user have to handle it itself, even if some method can (re)initialize it
bool dirty;
LUT(int s, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
#endif
dirty = true;
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( maxs );
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
}
void operator ()(int s, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
#endif
if (owner&&data)
delete[] data;
dirty = true; // Assumption!
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( maxs );
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
}
LUT(int s, T * source, int flags = 0xfffffff) {
#ifndef NDEBUG
if (s<=0)
printf("s<=0!\n");
assert (s>0);
if (source==NULL)
printf("source is NULL!\n");
assert (source != NULL);
#endif
dirty = false; // Assumption
clip = flags;
data = new T[s];
owner = 1;
size = s;
maxs=size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
maxsv = _mm_set1_ps( size - 2);
maxsiv = _mm_cvttps_epi32( maxsv );
sizeiv = _mm_set1_epi32( (int)(size-1) );
sizev = _mm_set1_ps( size-1 );
#endif
for (int i = 0; i < s; i++) {
data[i] = source[i];
}
}
LUT() {
data = NULL;
reset();
}
~LUT() {
if (owner) {
delete[] data;
#ifndef NDEBUG
data=(T*)0xBAADF00D;
#endif
}
}
void setClip(int flags) {
clip = flags;
}
LUT<T> & operator=(LUT<T> &rhs) {
if (this != &rhs) {
if (rhs.size>this->size)
{
delete [] this->data;
this->data=NULL;
}
if (this->data==NULL) this->data=new T[rhs.size];
this->clip=rhs.clip;
this->owner=1;
memcpy(this->data,rhs.data,rhs.size*sizeof(T));
this->size=rhs.size;
this->maxs=this->size-2;
#if defined( __SSE2__ ) && defined( __x86_64__ )
this->maxsv = _mm_set1_ps( this->size - 2);
this->maxsiv = _mm_cvttps_epi32( this->maxsv );
this->sizeiv = _mm_set1_epi32( (int)(this->size-1) );
this->sizev = _mm_set1_ps( this->size-1 );
#endif
}
return *this;
}
// use with integer indices
T& operator[](int index) const {
if (((unsigned int)index)<size) return data[index];
else
{
if (index < 0)
return data[0];
else
return data[size - 1];
}
}
#if defined( __SSE2__ ) && defined( __x86_64__ )
__m128 operator[](__m128 indexv ) const {
printf("don't use this operator. It's not ready for production");
return _mm_setzero_ps();
// convert floats to ints
__m128i idxv = _mm_cvttps_epi32( indexv );
__m128 tempv, resultv, p1v, p2v;
vmask maxmask = vmaskf_gt(indexv, maxsv);
idxv = _mm_castps_si128(vself(maxmask, maxsv, _mm_castsi128_ps(idxv)));
vmask minmask = vmaskf_lt(indexv, _mm_setzero_ps());
idxv = _mm_castps_si128(vself(minmask, _mm_setzero_ps(), _mm_castsi128_ps(idxv)));
// access the LUT 4 times and shuffle the values into p1v and p2v
int idx;
// get 4th value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(3,3,3,3)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(0,0,0,0));
p2v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
// now p1v is 3 3 3 3
// p2v is 3 3 3 3
// get 3rd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(2,2,2,2)));
tempv = LVFU(data[idx]);
p1v = _mm_move_ss( p1v, tempv);
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p1v is 3 3 3 2
// p2v is 3 3 3 2
// get 2nd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(1,1,1,1)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(1,0,1,0));
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(1,0,1,0));
// now p1v is 3 2 3 2
// now p2v is 3 2 3 2
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 3 1
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p1v is 3 2 3 1
// get 1st value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(0,0,0,0)));
tempv = LVFU(data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(3,2,0,0));
// now p1v is 3 2 1 1
p2v = _mm_shuffle_ps( p2v, p2v, _MM_SHUFFLE(3,2,0,0));
// now p2v is 3 2 1 1
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 1 0
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
p2v = _mm_move_ss( p2v, tempv);
// now p2v is 3 2 1 0
__m128 diffv = indexv - _mm_cvtepi32_ps ( idxv );
diffv = vself(vorm(maxmask,minmask), _mm_setzero_ps(), diffv);
resultv = p1v + p2v * diffv;
return resultv ;
}
__m128 operator[](__m128i idxv ) const
{
__m128 tempv, p1v;
tempv = _mm_cvtepi32_ps(idxv);
tempv = _mm_min_ps( tempv, sizev );
idxv = _mm_cvttps_epi32(_mm_max_ps( tempv, _mm_setzero_ps( ) ));
// access the LUT 4 times and shuffle the values into p1v
int idx;
// get 4th value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(3,3,3,3)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(0,0,0,0));
// now p1v is 3 3 3 3
// get 3rd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(2,2,2,2)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_move_ss( p1v, tempv);
// now p1v is 3 3 3 2
// get 2nd value
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(1,1,1,1)));
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(1,0,1,0));
// now p1v is 3 2 3 2
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 3 1
// get 1st value
idx = _mm_cvtsi128_si32 (idxv);
tempv = _mm_load_ss(&data[idx]);
p1v = _mm_shuffle_ps( p1v, p1v, _MM_SHUFFLE(3,2,0,0));
// now p1v is 3 2 1 1
p1v = _mm_move_ss( p1v, tempv );
// now p1v is 3 2 1 0
return p1v;
}
#endif
// use with float indices
T operator[](float index) const {
int idx = (int)index; // don't use floor! The difference in negative space is no problems here
if (((unsigned int)idx) > maxs) {
if (idx<0)
{
if (clip & LUT_CLIP_BELOW)
return data[0];
idx=0;
}
else
{
if (clip & LUT_CLIP_ABOVE)
return data[size - 1];
idx =maxs;
}
}
float diff = index - (float) idx;
T p1 = data[idx];
T p2 = data[idx + 1]-p1;
return (p1 + p2*diff);
}
#ifndef NDEBUG
// Debug facility ; dump the content of the LUT in a file. No control of the filename is done
void dump(Glib::ustring fname) {
if (size) {
Glib::ustring fname_ = fname + ".xyz"; // TopSolid'Design "plot" file format
std::ofstream f (fname_.c_str());
f << "$" << std::endl;
for (unsigned int iter=0; iter<size; iter++) {
f << iter << ", " << data[iter] << ", 0." << std::endl;
}
f << "$" << std::endl;
f.close ();
}
}
#endif
operator bool (void) const
{
return size>0;
}
void clear(void) {
if (data && size)
memset(data, 0, size * sizeof(T));
}
void reset(void) {
if (data) delete[] data;
dirty = true;
data = NULL;
owner = 1;
size = 0;
maxs=0;
}
};
#endif /* LUT_H_ */