diff --git a/AUTHORS.txt b/AUTHORS.txt index 8c87a42c3..37ac98993 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -12,6 +12,7 @@ Developement contributors, in last name alphabetical order: Oliver Duis Maciek Dworak Michael Ezra + Flössie Jean-Christophe Frisch Ilias Giarimis Steve Herrell diff --git a/rtengine/alignedbuffer.h b/rtengine/alignedbuffer.h index a33d4dfe8..ac8471b7e 100644 --- a/rtengine/alignedbuffer.h +++ b/rtengine/alignedbuffer.h @@ -18,9 +18,10 @@ */ #ifndef _ALIGNEDBUFFER_ #define _ALIGNEDBUFFER_ -#include +#include #include #include +#include #include #include "../rtgui/threadutils.h" @@ -58,7 +59,7 @@ public: /** @brief Return true if there's no memory allocated */ - bool isEmpty() + bool isEmpty() const { return allocatedSize == 0; } @@ -120,28 +121,14 @@ public: void swap(AlignedBuffer &other) { - void *tmpReal = other.real; - other.real = real; - real = tmpReal; - - char tmpAlignt = other.alignment; - other.alignment = alignment; - alignment = tmpAlignt; - - size_t tmpAllocSize = other.allocatedSize; - other.allocatedSize = allocatedSize; - allocatedSize = tmpAllocSize; - - T* tmpData = other.data; - other.data = data; - data = tmpData; - - bool tmpInUse = other.inUse; - other.inUse = inUse; - inUse = tmpInUse; + std::swap(real, other.real); + std::swap(alignment, other.alignment); + std::swap(allocatedSize, other.allocatedSize); + std::swap(data, other.data); + std::swap(inUse, other.inUse); } - unsigned int getSize() + unsigned int getSize() const { return unitSize ? allocatedSize / unitSize : 0; } diff --git a/rtengine/cache.h b/rtengine/cache.h new file mode 100644 index 000000000..2e53aab2a --- /dev/null +++ b/rtengine/cache.h @@ -0,0 +1,236 @@ +/* + * This file is part of RawTherapee. + * + * Copyright (c) 2016 Flössie + * + * RawTherapee is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RawTherapee is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RawTherapee. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "../rtgui/threadutils.h" + +namespace rtengine +{ + +namespace cache_helper +{ + + // See http://stackoverflow.com/a/20790050 + template + struct has_hash + : std::false_type + { + }; + + template + struct has_hash()(std::declval()), void())> + : std::true_type + { + }; + +} + +template +class Cache +{ +public: + class Hook + { + public: + virtual ~Hook() + { + } + virtual void onDiscard(const K& key, const V& value) = 0; + virtual void onDisplace(const K& key, const V& value) = 0; + virtual void onRemove(const K& key, const V& value) = 0; + virtual void onDestroy() = 0; + }; + + Cache(unsigned long _size, Hook* _hook = 0) : + store_size(_size), + hook(_hook) + { + } + + ~Cache() + { + if (hook) { + resize(0); + hook->onDestroy(); + } + } + + bool get(const K& key, V& value) const + { + mutex.lock(); + const StoreConstIterator store_it = store.find(key); + const bool present = store_it != store.end(); + if (present) { + lru_list.splice( + lru_list.begin(), + lru_list, + store_it->second.lru_list_it + ); + value = store_it->second.value; + } + mutex.unlock(); + + return present; + } + + bool set(const K& key, const V& value) + { + return set(key, value, Mode::UNCOND); + } + + bool replace(const K& key, const V& value) + { + return set(key, value, Mode::KNOWN); + } + + bool insert(const K& key, const V& value) + { + return set(key, value, Mode::UNKNOWN); + } + + bool remove(const K& key) + { + mutex.lock(); + const StoreIterator store_it = store.find(key); + const bool present = store_it != store.end(); + if (present) { + remove(store_it); + } + mutex.unlock(); + + return present; + } + + void resize(unsigned long size) + { + mutex.lock(); + while (lru_list.size() > size) { + discard(); + } + store_size = size; + mutex.unlock(); + } + + void clear() + { + mutex.lock(); + if (hook) { + for (const auto& entry : store) { + hook->onRemove(entry.first, entry.second.value); + } + } + lru_list.clear(); + store.clear(); + mutex.unlock(); + } + +private: + struct Value; + + using Store = typename std::conditional< + cache_helper::has_hash::value, + std::unordered_map, + std::map + >::type; + using StoreIterator = typename Store::iterator; + using StoreConstIterator = typename Store::const_iterator; + + typedef std::list LruList; + using LruListIterator = typename LruList::iterator; + + struct Value { + V value; + LruListIterator lru_list_it; + }; + + enum class Mode { + UNCOND, + KNOWN, + UNKNOWN + }; + + void discard() + { + const StoreIterator store_it = lru_list.back(); + if (hook) { + hook->onDiscard(store_it->first, store_it->second.value); + } + store.erase(store_it); + lru_list.pop_back(); + } + + bool set(const K& key, const V& value, Mode mode) + { + mutex.lock(); + const StoreIterator store_it = store.find(key); + const bool is_new_key = store_it == store.end(); + if (is_new_key) { + if (mode == Mode::UNCOND || mode == Mode::UNKNOWN) { + if (lru_list.size() >= store_size) { + discard(); + } + lru_list.push_front(store.end()); + const Value v = { + value, + lru_list.begin() + }; + lru_list.front() = store.emplace(key, v).first; + } + } else { + if (mode == Mode::UNCOND || mode == Mode::KNOWN) { + if (hook) { + hook->onDisplace(key, store_it->second.value); + } + lru_list.splice( + lru_list.begin(), + lru_list, + store_it->second.lru_list_it + ); + store_it->second.value = value; + } + } + mutex.unlock(); + + return is_new_key; + } + + void remove(const StoreIterator& store_it) + { + if (hook) { + hook->onRemove(store_it->first, store_it->second.value); + } + lru_list.erase(store_it->second.lru_list_it); + store.erase(store_it); + } + + unsigned long store_size; + Hook* const hook; + mutable MyMutex mutex; + Store store; + mutable LruList lru_list; +}; + +} diff --git a/rtengine/clutstore.cc b/rtengine/clutstore.cc index a0ea5afb4..ea3a2a7fc 100644 --- a/rtengine/clutstore.cc +++ b/rtengine/clutstore.cc @@ -1,442 +1,309 @@ +#include + #include "clutstore.h" + +#include "opthelper.h" #include "rt_math.h" +#include "imagefloat.h" #include "stdimagesource.h" #include "../rtgui/options.h" -rtengine::CLUTStore clutStore; - -using namespace rtengine; - -const float MAXVAL8 = 255.; - -CLUTStore::CLUTStore() +namespace { -} -CLUT* CLUTStore::getClut( const Glib::ustring& filename ) +bool loadFile( + const Glib::ustring& filename, + const Glib::ustring& working_color_space, + AlignedBuffer& clut_image, + unsigned int& clut_level +) { - CLUT *result = 0; - m_mutex.lock(); - Cluts::iterator cluts_it = m_cluts.find(filename); + rtengine::StdImageSource img_src; - if (cluts_it == m_cluts.end()) { - if (m_cluts.size() >= options.clutCacheSize) { - // Evict a "random" entry from cache - Cluts::iterator victim_it = m_cluts.begin(); - - if (--victim_it->second.first == -1) { - delete victim_it->second.second; - m_cluts.erase(victim_it); - } - } - - cluts_it = m_cluts.insert(std::make_pair(filename, std::make_pair(0, new HaldCLUT))).first; - cluts_it->second.second->load( filename ); - } - - if (cluts_it->second.second->isValid()) { - result = cluts_it->second.second; - ++cluts_it->second.first; - } else { - delete cluts_it->second.second; - m_cluts.erase(cluts_it); - } - - m_mutex.unlock(); - - return result; -} - -void CLUTStore::releaseClut( const CLUT* clut ) -{ - m_mutex.lock(); - - for (Cluts::iterator cluts_it = m_cluts.begin(); cluts_it != m_cluts.end(); ++cluts_it) { - if (cluts_it->second.second == clut) { - if (--cluts_it->second.first == -1) { - delete cluts_it->second.second; - m_cluts.erase(cluts_it); - } - - break; - } - } - - m_mutex.unlock(); -} - -void CLUTStore::clearCache() -{ - m_mutex.lock(); - - for (Cluts::iterator cluts_it = m_cluts.begin(); cluts_it != m_cluts.end();) { - if (--cluts_it->second.first == -1) { - delete cluts_it->second.second; - Cluts::iterator tmp = cluts_it; - ++cluts_it; - m_cluts.erase(tmp); - } else { - ++cluts_it; - } - } - - m_mutex.unlock(); -} - -void rtengine::splitClutFilename( Glib::ustring filename, Glib::ustring &name, Glib::ustring &extension, Glib::ustring &profileName ) -{ - filename = Glib::path_get_basename( filename ); - name = filename; - //remove dirs - size_t lastSlashPos = filename.find_last_of( "/" ); - - if ( lastSlashPos == Glib::ustring::npos ) { - lastSlashPos = filename.find_last_of( "\\" ); - } - - size_t lastDotPos = filename.find_last_of( '.' ); - - if ( lastDotPos != Glib::ustring::npos ) { - name = filename.substr( 0, lastDotPos ); - extension = filename.substr( lastDotPos + 1, Glib::ustring::npos ); - } - - profileName = "sRGB"; // sRGB by default - static std::vector workingProfiles = rtengine::getWorkingProfiles(); - - for ( std::vector::iterator it = workingProfiles.begin(); it != workingProfiles.end(); ++it ) { - Glib::ustring ¤tProfile = *it; - - if ( std::search( name.rbegin(), name.rend(), currentProfile.rbegin(), currentProfile.rend() ) == name.rbegin() ) { - profileName = currentProfile; - name = name.substr( 0, name.size() - currentProfile.size() ); - break; - } - } -} - -//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -HaldCLUT::HaldCLUT() - : m_clutImage( 0 ), - m_level (0), - m_profile( "sRGB" ) -{ -} - -HaldCLUT::~HaldCLUT() -{ - if ( m_clutImage ) { - m_clutImage->free(); - m_clutImage = 0; - } -} - -void HaldCLUT::load( Glib::ustring filename ) -{ - m_clutImage = loadFile( filename, "", m_level ); - Glib::ustring name, ext; - splitClutFilename( filename, name, ext, m_profile ); - - if ( m_clutImage ) { - m_filename = filename; - } -} - -Glib::ustring HaldCLUT::profile() const -{ - return m_profile; -} - -Imagefloat* HaldCLUT::loadFile( Glib::ustring filename, Glib::ustring workingColorSpace, int &outLevel ) -{ - Imagefloat *result = 0; - StdImageSource imgSrc; - - if ( !Glib::file_test( filename, Glib::FILE_TEST_EXISTS ) || imgSrc.load(filename) ) { - return result; + if (!Glib::file_test(filename, Glib::FILE_TEST_EXISTS) || img_src.load(filename)) { + return false; } int fw, fh; - imgSrc.getFullSize (fw, fh, TR_NONE); + img_src.getFullSize(fw, fh, TR_NONE); - bool valid = false; + bool res = false; - //test on Hald format, copypasted from http://www.quelsolaar.com/technology/clut.html - if ( fw == fh ) { - outLevel = 1; + if (fw == fh) { + unsigned int level = 1; - for(; outLevel * outLevel * outLevel < fw; outLevel++); + while (level * level * level < fw) { + ++level; + } - if( !( outLevel * outLevel * outLevel > fw ) ) { - valid = true; + if (level * level * level == fw && level > 1) { + clut_level = level; + res = true; } } - if ( valid ) { - ColorTemp currWB = imgSrc.getWB(); - Imagefloat* baseImg = new Imagefloat (fw, fh); - PreviewProps pp (0, 0, fw, fh, 1); + if (res) { + rtengine::ColorTemp curr_wb = img_src.getWB(); + std::unique_ptr img_float = std::unique_ptr(new rtengine::Imagefloat(fw, fh)); + const PreviewProps pp(0, 0, fw, fh, 1); - procparams::ColorManagementParams icm; - icm.working = workingColorSpace; + rtengine::procparams::ColorManagementParams icm; + icm.working = working_color_space; - imgSrc.getImage (currWB, TR_NONE, baseImg, pp, procparams::ToneCurveParams(), icm, procparams::RAWParams()); + img_src.getImage(curr_wb, TR_NONE, img_float.get(), pp, rtengine::procparams::ToneCurveParams(), icm, rtengine::procparams::RAWParams()); - if ( !workingColorSpace.empty() ) { - imgSrc.convertColorSpace(baseImg, icm, currWB); + if (!working_color_space.empty()) { + img_src.convertColorSpace(img_float.get(), icm, curr_wb); } - result = baseImg; + AlignedBuffer image(fw * fh * 4 + 1); + + std::size_t index = 0; + + for (int y = 0; y < fh; ++y) { + for (int x = 0; x < fw; ++x) { + image.data[index] = img_float->r(y, x); + ++index; + image.data[index] = img_float->g(y, x); + ++index; + image.data[index] = img_float->b(y, x); + index += 2; + } + } + + clut_image.swap(image); + } + + return res; +} + +#ifdef __SSE2__ +vfloat getClutValue(const AlignedBuffer& clut_image, size_t index) +{ +#ifdef __SSE4_1__ + return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(*reinterpret_cast(clut_image.data + index))); +#else + return _mm_cvtpu16_ps(*reinterpret_cast(clut_image.data + index)); +#endif +} +#endif + +} + +rtengine::HaldCLUT::HaldCLUT() : + clut_level(0), + flevel_minus_one(0.0f), + flevel_minus_two(0.0f), + clut_profile("sRGB") +{ +} + +rtengine::HaldCLUT::~HaldCLUT() +{ +} + +bool rtengine::HaldCLUT::load(const Glib::ustring& filename) +{ + if (loadFile(filename, "", clut_image, clut_level)) { + Glib::ustring name, ext; + splitClutFilename(filename, name, ext, clut_profile); + + clut_filename = filename; + clut_level *= clut_level; + flevel_minus_one = static_cast(clut_level - 1) / 65535.0f; + flevel_minus_two = static_cast(clut_level - 2); + return true; + } + + return false; +} + +rtengine::HaldCLUT::operator bool() const +{ + return !clut_image.isEmpty(); +} + +Glib::ustring rtengine::HaldCLUT::getFilename() const +{ + return clut_filename; +} + +Glib::ustring rtengine::HaldCLUT::getProfile() const +{ + return clut_profile; +} + +void rtengine::HaldCLUT::getRGB( + float strength, + std::size_t line_size, + const float* r, + const float* g, + const float* b, + float* out_rgbx +) const +{ + const unsigned int level = clut_level; // This is important + + const unsigned int level_square = level * level; + +#ifdef __SSE2__ + const vfloat v_strength = F2V(strength); +#endif + + for (std::size_t column = 0; column < line_size; ++column, ++r, ++g, ++b, out_rgbx += 4) { + const unsigned int red = std::min(flevel_minus_two, *r * flevel_minus_one); + const unsigned int green = std::min(flevel_minus_two, *g * flevel_minus_one); + const unsigned int blue = std::min(flevel_minus_two, *b * flevel_minus_one); + + const unsigned int color = red + green * level + blue * level_square; + +#ifndef __SSE2__ + const float re = *r * flevel_minus_one - red; + const float gr = *g * flevel_minus_one - green; + const float bl = *b * flevel_minus_one - blue; + + size_t index = color * 4; + + float tmp1[4] ALIGNED16; + tmp1[0] = intp(re, clut_image.data[index + 4], clut_image.data[index]); + tmp1[1] = intp(re, clut_image.data[index + 5], clut_image.data[index + 1]); + tmp1[2] = intp(re, clut_image.data[index + 6], clut_image.data[index + 2]); + + index = (color + level) * 4; + + float tmp2[4] ALIGNED16; + tmp2[0] = intp(re, clut_image.data[index + 4], clut_image.data[index]); + tmp2[1] = intp(re, clut_image.data[index + 5], clut_image.data[index + 1]); + tmp2[2] = intp(re, clut_image.data[index + 6], clut_image.data[index + 2]); + + out_rgbx[0] = intp(gr, tmp2[0], tmp1[0]); + out_rgbx[1] = intp(gr, tmp2[1], tmp1[1]); + out_rgbx[2] = intp(gr, tmp2[2], tmp1[2]); + + index = (color + level_square) * 4; + + tmp1[0] = intp(re, clut_image.data[index + 4], clut_image.data[index]); + tmp1[1] = intp(re, clut_image.data[index + 5], clut_image.data[index + 1]); + tmp1[2] = intp(re, clut_image.data[index + 6], clut_image.data[index + 2]); + + index = (color + level + level_square) * 4; + + tmp2[0] = intp(re, clut_image.data[index + 4], clut_image.data[index]); + tmp2[1] = intp(re, clut_image.data[index + 5], clut_image.data[index + 1]); + tmp2[2] = intp(re, clut_image.data[index + 6], clut_image.data[index + 2]); + + tmp1[0] = intp(gr, tmp2[0], tmp1[0]); + tmp1[1] = intp(gr, tmp2[1], tmp1[1]); + tmp1[2] = intp(gr, tmp2[2], tmp1[2]); + + out_rgbx[0] = intp(bl, tmp1[0], out_rgbx[0]); + out_rgbx[1] = intp(bl, tmp1[1], out_rgbx[1]); + out_rgbx[2] = intp(bl, tmp1[2], out_rgbx[2]); + + out_rgbx[0] = intp(strength, out_rgbx[0], *r); + out_rgbx[1] = intp(strength, out_rgbx[1], *g); + out_rgbx[2] = intp(strength, out_rgbx[2], *b); +#else + const vfloat v_in = _mm_set_ps(0.0f, *b, *g, *r); + const vfloat v_tmp = v_in * _mm_load_ps1(&flevel_minus_one); + const vfloat v_rgb = v_tmp - _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_min_ps(_mm_load_ps1(&flevel_minus_two), v_tmp))); + + size_t index = color * 4; + + const vfloat v_r = PERMUTEPS(v_rgb, _MM_SHUFFLE(0, 0, 0, 0)); + + vfloat v_tmp1 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); + + index = (color + level) * 4; + + vfloat v_tmp2 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); + + const vfloat v_g = PERMUTEPS(v_rgb, _MM_SHUFFLE(1, 1, 1, 1)); + + vfloat v_out = vintpf(v_g, v_tmp2, v_tmp1); + + index = (color + level_square) * 4; + + v_tmp1 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); + + index = (color + level + level_square) * 4; + + v_tmp2 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); + + v_tmp1 = vintpf(v_g, v_tmp2, v_tmp1); + + const vfloat v_b = PERMUTEPS(v_rgb, _MM_SHUFFLE(2, 2, 2, 2)); + + v_out = vintpf(v_b, v_tmp1, v_out); + + STVF(*out_rgbx, vintpf(v_strength, v_out, v_in)); +#endif + } +} + +void rtengine::HaldCLUT::splitClutFilename( + const Glib::ustring& filename, + Glib::ustring& name, + Glib::ustring& extension, + Glib::ustring& profile_name +) +{ + Glib::ustring basename = Glib::path_get_basename(filename); + + Glib::ustring::size_type last_slash_pos = basename.rfind('/'); + + if (last_slash_pos == Glib::ustring::npos) { + last_slash_pos = basename.rfind('\\'); + } + + const Glib::ustring::size_type last_dot_pos = basename.rfind('.'); + + if (last_dot_pos != Glib::ustring::npos) { + name.assign(basename, 0, last_dot_pos); + extension.assign(basename, last_dot_pos + 1, Glib::ustring::npos); + } else { + name = basename; + } + + profile_name = "sRGB"; + + for (const auto& working_profile : rtengine::getWorkingProfiles()) { + if (std::search(name.rbegin(), name.rend(), working_profile.rbegin(), working_profile.rend()) == name.rbegin()) { + profile_name = working_profile; + name.erase(name.size() - working_profile.size()); + break; + } + } +} + +rtengine::CLUTStore& rtengine::CLUTStore::getInstance() +{ + static CLUTStore instance; + return instance; +} + +std::shared_ptr rtengine::CLUTStore::getClut(const Glib::ustring& filename) +{ + std::shared_ptr result; + + if (!cache.get(filename, result)) { + std::unique_ptr clut(new rtengine::HaldCLUT); + + if (clut->load(filename)) { + result = std::move(clut); + cache.insert(filename, result); + } } return result; } -void HaldCLUT::loadClut( Imagefloat *img, RawClut &outClut ) +void rtengine::CLUTStore::clearCache() { - img->normalizeFloatTo1(); - int y_size = img->getH(); - int x_size = img->getW(); - outClut.resize( x_size * y_size * 3 ); - int clutIdx = 0; - - //int level = m_level * m_level; (unused) - for(int y = 0; y < y_size; y++) { - for(int x = 0; x < x_size; x++) { - outClut[ clutIdx * 3 ] = img->r( y, x ) * MAXVAL8; - outClut[ clutIdx * 3 + 1 ] = img->g( y, x ) * MAXVAL8; - outClut[ clutIdx * 3 + 2 ] = img->b( y, x ) * MAXVAL8; - - ++clutIdx; - } - } + cache.clear(); } -Imagefloat* HaldCLUT::generateIdentImage( int level ) +rtengine::CLUTStore::CLUTStore() : + cache(options.clutCacheSize) { - int imageWidth = level * level * level; - Imagefloat *resultImg = new Imagefloat( imageWidth, imageWidth ); - - int cubeSideSize = level * level; - float step = MAXVALF / (cubeSideSize - 1); - int pos = 0; - - for( int b = 0; b < cubeSideSize; ++b ) { - for ( int g = 0; g < cubeSideSize; ++g ) { - for ( int r = 0; r < cubeSideSize; ++r ) { - int x = pos / imageWidth; - int y = pos % imageWidth; - resultImg->r( x, y ) = step * r; - resultImg->g( x, y ) = step * g; - resultImg->b( x, y ) = step * b; - ++pos; - } - } - } - - return resultImg; -} - - -bool HaldCLUT::isValid() const -{ - return m_clutImage != 0; -} - -void HaldCLUT::getRGB( float rr, float gg, float bb, float &outR, float &outG, float &outB ) const -{ - rr /= MAXVALF; - gg /= MAXVALF; - bb /= MAXVALF; - correct( *m_clutImage, m_level, rr, gg, bb, outR, outG, outB ); -} - -inline float valF( unsigned char val ) -{ - return float( val ) / MAXVAL8; -} - -// copypasted from http://www.quelsolaar.com/technology/clut.html -void HaldCLUT::correct( const HaldCLUT::RawClut& clut, int level, float rr, float gg, float bb, float &outR, float &outG, float &outB ) -{ - int color, red, green, blue, i, j; - float tmp[6], r, g, b; - level = level * level; - - red = rr * (float)(level - 1); - - if(red > level - 2) { - red = (float)level - 2; - } - - if(red < 0) { - red = 0; - } - - green = gg * (float)(level - 1); - - if(green > level - 2) { - green = (float)level - 2; - } - - if(green < 0) { - green = 0; - } - - blue = bb * (float)(level - 1); - - if(blue > level - 2) { - blue = (float)level - 2; - } - - if(blue < 0) { - blue = 0; - } - - r = rr * (float)(level - 1) - red; - g = gg * (float)(level - 1) - green; - b = bb * (float)(level - 1) - blue; - - color = red + green * level + blue * level * level; - - i = color * 3; - j = (color + 1) * 3; - - tmp[0] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[1] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[2] = valF( clut[i] ) * (1 - r) + valF( clut[j] ) * r; - - i = (color + level) * 3; - j = (color + level + 1) * 3; - - tmp[3] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[4] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[5] = valF( clut[i] ) * (1 - r) + valF( clut[j] ) * r; - - outR = tmp[0] * (1 - g) + tmp[3] * g; - outG = tmp[1] * (1 - g) + tmp[4] * g; - outB = tmp[2] * (1 - g) + tmp[5] * g; - - i = (color + level * level) * 3; - j = (color + level * level + 1) * 3; - - tmp[0] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[1] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[2] = valF( clut[i] ) * (1 - r) + valF( clut[j] ) * r; - - i = (color + level + level * level) * 3; - j = (color + level + level * level + 1) * 3; - - tmp[3] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[4] = valF( clut[i++] ) * (1 - r) + valF( clut[j++] ) * r; - tmp[5] = valF( clut[i] ) * (1 - r) + valF( clut[j] ) * r; - - tmp[0] = tmp[0] * (1 - g) + tmp[3] * g; - tmp[1] = tmp[1] * (1 - g) + tmp[4] * g; - tmp[2] = tmp[2] * (1 - g) + tmp[5] * g; - - outR = outR * (1 - b) + tmp[0] * b; - outG = outG * (1 - b) + tmp[1] * b; - outB = outB * (1 - b) + tmp[2] * b; -} - -inline void pos2xy( int pos, int imageSideSize, int &outX, int &outY ) -{ - outX = pos / imageSideSize; - outY = pos % imageSideSize; -} - -void HaldCLUT::correct( Imagefloat &clutImage, int level, float rr, float gg, float bb, float &outR, float &outG, float &outB ) -{ - int color, red, green, blue, i, j; - float tmp[6], r, g, b; - level = level * level; - int imageSideSize = clutImage.getW(); - - red = rr * (float)(level - 1); - - if(red > level - 2) { - red = (float)level - 2; - } - - if(red < 0) { - red = 0; - } - - green = gg * (float)(level - 1); - - if(green > level - 2) { - green = (float)level - 2; - } - - if(green < 0) { - green = 0; - } - - blue = bb * (float)(level - 1); - - if(blue > level - 2) { - blue = (float)level - 2; - } - - if(blue < 0) { - blue = 0; - } - - r = rr * (float)(level - 1) - red; - g = gg * (float)(level - 1) - green; - b = bb * (float)(level - 1) - blue; - - color = red + green * level + blue * level * level; - - - i = color; - j = color + 1; - int xi, yi, xj, yj; - pos2xy( i, imageSideSize, xi, yi ); - pos2xy( j, imageSideSize, xj, yj ); - - tmp[0] = clutImage.r( xi, yi ) * (1 - r) + clutImage.r( xj, yj ) * r; - tmp[1] = clutImage.g( xi, yi ) * (1 - r) + clutImage.g( xj, yj ) * r; - tmp[2] = clutImage.b( xi, yi ) * (1 - r) + clutImage.b( xj, yj ) * r; - - i = color + level; - j = color + level + 1; - pos2xy( i, imageSideSize, xi, yi ); - pos2xy( j, imageSideSize, xj, yj ); - - tmp[3] = clutImage.r( xi, yi ) * (1 - r) + clutImage.r( xj, yj ) * r; - tmp[4] = clutImage.g( xi, yi ) * (1 - r) + clutImage.g( xj, yj ) * r; - tmp[5] = clutImage.b( xi, yi ) * (1 - r) + clutImage.b( xj, yj ) * r; - - outR = tmp[0] * (1 - g) + tmp[3] * g; - outG = tmp[1] * (1 - g) + tmp[4] * g; - outB = tmp[2] * (1 - g) + tmp[5] * g; - - i = color + level * level; - j = color + level * level + 1; - pos2xy( i, imageSideSize, xi, yi ); - pos2xy( j, imageSideSize, xj, yj ); - - tmp[0] = clutImage.r( xi, yi ) * (1 - r) + clutImage.r( xj, yj ) * r; - tmp[1] = clutImage.g( xi, yi ) * (1 - r) + clutImage.g( xj, yj ) * r; - tmp[2] = clutImage.b( xi, yi ) * (1 - r) + clutImage.b( xj, yj ) * r; - - i = color + level + level * level; - j = color + level + level * level + 1; - pos2xy( i, imageSideSize, xi, yi ); - pos2xy( j, imageSideSize, xj, yj ); - - tmp[3] = clutImage.r( xi, yi ) * (1 - r) + clutImage.r( xj, yj ) * r; - tmp[4] = clutImage.g( xi, yi ) * (1 - r) + clutImage.g( xj, yj ) * r; - tmp[5] = clutImage.b( xi, yi ) * (1 - r) + clutImage.b( xj, yj ) * r; - - tmp[0] = tmp[0] * (1 - g) + tmp[3] * g; - tmp[1] = tmp[1] * (1 - g) + tmp[4] * g; - tmp[2] = tmp[2] * (1 - g) + tmp[5] * g; - - outR = outR * (1 - b) + tmp[0] * b; - outG = outG * (1 - b) + tmp[1] * b; - outB = outB * (1 - b) + tmp[2] * b; } diff --git a/rtengine/clutstore.h b/rtengine/clutstore.h index de080b737..7383b597f 100644 --- a/rtengine/clutstore.h +++ b/rtengine/clutstore.h @@ -1,107 +1,72 @@ -#ifndef CLUT_STORE_INCLUDED -#define CLUT_STORE_INCLUDED +#pragma once + +#include +#include #include -#include "../rtgui/threadutils.h" -#include "imagefloat.h" -#include -#include + +#include "cache.h" +#include "alignedbuffer.h" namespace rtengine { -// simple CLUT interface -class CLUT -{ -public: - virtual void getRGB( float r, float g, float b, float &outR, float &outG, float &outB ) const = 0; - virtual Glib::ustring profile() const = 0; -protected: - virtual ~CLUT() {}; -}; - -class HaldCLUT : public CLUT +class HaldCLUT { public: HaldCLUT(); + HaldCLUT(const HaldCLUT& other) = delete; + HaldCLUT& operator =(const HaldCLUT& other) = delete; ~HaldCLUT(); - void load( Glib::ustring filename ); - bool isValid() const; - void getRGB( float r, float g, float b, float &outR, float &outG, float &outB ) const; - Glib::ustring profile() const; + bool load(const Glib::ustring& filename); - typedef std::vector RawClut; // using 8 bit for reduce memory usage - static void correct( const RawClut&, int level, float r, float g, float b, float &outR, float &outG, float &outB ); - static void correct( Imagefloat &clutImage, int level, float rr, float gg, float bb, float &outR, float &outG, float &outB ); - static Imagefloat* generateIdentImage( int level ); - static Imagefloat* loadFile( Glib::ustring filename, Glib::ustring workingColorSpace, int &outLevel ); + explicit operator bool() const; + + Glib::ustring getFilename() const; + Glib::ustring getProfile() const; + + void getRGB( + float strength, + std::size_t line_size, + const float* r, + const float* g, + const float* b, + float* out_rgbx + ) const; + + static void splitClutFilename( + const Glib::ustring& filename, + Glib::ustring& name, + Glib::ustring& extension, + Glib::ustring& profile_name + ); private: - - void loadClut( Imagefloat *img, RawClut &outClut ); - - Imagefloat *m_clutImage; - int m_level; - Glib::ustring m_filename; - Glib::ustring m_profile; + AlignedBuffer clut_image; + unsigned int clut_level; + float flevel_minus_one; + float flevel_minus_two; + Glib::ustring clut_filename; + Glib::ustring clut_profile; }; -// CLUT cache class CLUTStore { public: - CLUTStore(); + static CLUTStore& getInstance(); - CLUT* getClut( const Glib::ustring& filename ); - void releaseClut( const CLUT* clut ); + CLUTStore(const CLUTStore& other) = delete; + CLUTStore& operator =(const CLUTStore& other) = delete; + + std::shared_ptr getClut(const Glib::ustring& filename); void clearCache(); private: - typedef std::map > Cluts; + CLUTStore(); - Cluts m_cluts; - MyMutex m_mutex; + Cache> cache; }; -void splitClutFilename( Glib::ustring filename, Glib::ustring &name, Glib::ustring &extension, Glib::ustring &profileName ); - -}; //namespace rtengine - -extern rtengine::CLUTStore clutStore; - -namespace rtengine -{ - -//support class for automate call of clutStore.releaseClut() -class ClutPtr -{ -public: - ClutPtr() : m_point( 0 ) {} - explicit ClutPtr(CLUT *p) : m_point( p ) {} - ~ClutPtr() - { - clutStore.releaseClut( m_point ); - } - const CLUT* operator-> () const - { - return m_point; - } - operator bool() const - { - return m_point != 0; - } - void set( CLUT *p ) - { - m_point = p; - } - -private: - ClutPtr& operator=(ClutPtr const& cp ); - CLUT *m_point; -}; - -}; //namespace rtengine - -#endif +} diff --git a/rtengine/color.cc b/rtengine/color.cc index f5a8c86a3..dc0710a2f 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -819,6 +819,15 @@ void Color::rgbxyz (float r, float g, float b, float &x, float &y, float &z, con z = ((xyz_rgb[2][0] * r + xyz_rgb[2][1] * g + xyz_rgb[2][2] * b)) ; } +#ifdef __SSE2__ +void Color::rgbxyz (vfloat r, vfloat g, vfloat b, vfloat &x, vfloat &y, vfloat &z, const vfloat xyz_rgb[3][3]) +{ + x = ((xyz_rgb[0][0] * r + xyz_rgb[0][1] * g + xyz_rgb[0][2] * b)) ; + y = ((xyz_rgb[1][0] * r + xyz_rgb[1][1] * g + xyz_rgb[1][2] * b)) ; + z = ((xyz_rgb[2][0] * r + xyz_rgb[2][1] * g + xyz_rgb[2][2] * b)) ; +} +#endif + void Color::xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const double rgb_xyz[3][3]) { //Transform to output color. Standard sRGB is D65, but internal representation is D50 diff --git a/rtengine/color.h b/rtengine/color.h index 3f78692d8..be7740e2a 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -325,7 +325,9 @@ public: */ static void rgbxyz (float r, float g, float b, float &x, float &y, float &z, const double xyz_rgb[3][3]); static void rgbxyz (float r, float g, float b, float &x, float &y, float &z, const float xyz_rgb[3][3]); - +#ifdef __SSE2__ + static void rgbxyz (vfloat r, vfloat g, vfloat b, vfloat &x, vfloat &y, vfloat &z, const vfloat xyz_rgb[3][3]); +#endif /** * @brief Convert Lab in xyz @@ -1077,6 +1079,10 @@ public: { return gammatab_srgb[x]; } + static inline float gamma_srgbclipped (float x) + { + return gamma2curve[x]; + } static inline float gamma (float x) { return gammatab[x]; diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 989d9c6a7..c1079c8b1 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -19,6 +19,9 @@ #include #include #include +#ifdef _OPENMP +#include +#endif #include "rtengine.h" #include "improcfun.h" @@ -38,9 +41,6 @@ #include "clutstore.h" #include "ciecam02.h" -#ifdef _OPENMP -#include -#endif #undef CLIPD #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f) @@ -3205,27 +3205,42 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } } - ClutPtr colorLUT; + std::shared_ptr hald_clut; bool clutAndWorkingProfilesAreSame = false; TMatrix work2xyz, xyz2clut, clut2xyz, xyz2work; +#ifdef __SSE2__ + vfloat v_work2xyz[3][3]; + vfloat v_xyz2clut[3][3]; + vfloat v_clut2xyz[3][3]; + vfloat v_xyz2work[3][3]; +#endif if ( params->filmSimulation.enabled && !params->filmSimulation.clutFilename.empty() ) { - colorLUT.set( clutStore.getClut( params->filmSimulation.clutFilename ) ); + hald_clut = CLUTStore::getInstance().getClut( params->filmSimulation.clutFilename ); - if ( colorLUT ) { - clutAndWorkingProfilesAreSame = colorLUT->profile() == params->icm.working; + if ( hald_clut ) { + clutAndWorkingProfilesAreSame = hald_clut->getProfile() == params->icm.working; if ( !clutAndWorkingProfilesAreSame ) { work2xyz = iccStore->workingSpaceMatrix( params->icm.working ); - xyz2clut = iccStore->workingSpaceInverseMatrix( colorLUT->profile() ); + xyz2clut = iccStore->workingSpaceInverseMatrix( hald_clut->getProfile() ); xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working ); - clut2xyz = iccStore->workingSpaceMatrix( colorLUT->profile() ); + clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() ); +#ifdef __SSE2__ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + v_work2xyz[i][j] = F2V(work2xyz[i][j]); + v_xyz2clut[i][j] = F2V(xyz2clut[i][j]); + v_xyz2work[i][j] = F2V(xyz2work[i][j]); + v_clut2xyz[i][j] = F2V(clut2xyz[i][j]); + } + } +#endif } } } - double filmSimCorrectedStrength = double(params->filmSimulation.strength) / 100.; - double filmSimSourceStrength = double(100 - params->filmSimulation.strength) / 100.; + const float film_simulation_strength = static_cast(params->filmSimulation.strength) / 100.0f; const float exp_scale = pow (2.0, expcomp); const float comp = (max(0.0, expcomp) + 1.0) * hlcompr / 100.0; @@ -4335,50 +4350,117 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } - //Film Simulations - if ( colorLUT ) { + // Film Simulations + if (hald_clut) { + float out_rgbx[4 * TS] ALIGNED16; + for (int i = istart, ti = 0; i < tH; i++, ti++) { + if (!clutAndWorkingProfilesAreSame) { + // Convert from working to clut profile +#ifdef __SSE2__ + if (!(std::min(TS, tW - jstart) & ~3)) { + for (int j = jstart, tj = 0; j < tW; j += 4, tj += 4) { + vfloat sourceR = LVF(rtemp[ti * TS + tj]); + vfloat sourceG = LVF(gtemp[ti * TS + tj]); + vfloat sourceB = LVF(btemp[ti * TS + tj]); + + vfloat x; + vfloat y; + vfloat z; + Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz); + Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut); + + STVF(rtemp[ti * TS + tj], sourceR); + STVF(gtemp[ti * TS + tj], sourceG); + STVF(btemp[ti * TS + tj], sourceB); + } + } + else +#endif + { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + float &sourceR = rtemp[ti * TS + tj]; + float &sourceG = gtemp[ti * TS + tj]; + float &sourceB = btemp[ti * TS + tj]; + + float x, y, z; + Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, work2xyz); + Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, xyz2clut); + } + } + } + for (int j = jstart, tj = 0; j < tW; j++, tj++) { float &sourceR = rtemp[ti * TS + tj]; float &sourceG = gtemp[ti * TS + tj]; float &sourceB = btemp[ti * TS + tj]; - if (!clutAndWorkingProfilesAreSame) { - //convert from working to clut profile - float x, y, z; - Color::rgbxyz( sourceR, sourceG, sourceB, x, y, z, work2xyz ); - Color::xyz2rgb( x, y, z, sourceR, sourceG, sourceB, xyz2clut ); + // Apply gamma sRGB (default RT) + sourceR = Color::gamma_srgbclipped(sourceR); + sourceG = Color::gamma_srgbclipped(sourceG); + sourceB = Color::gamma_srgbclipped(sourceB); + } + + const std::size_t line_offset = ti * TS; + hald_clut->getRGB( + film_simulation_strength, + std::min(TS, tW - jstart), + rtemp + line_offset, + gtemp + line_offset, + btemp + line_offset, + out_rgbx + ); + + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + float &sourceR = rtemp[ti * TS + tj]; + float &sourceG = gtemp[ti * TS + tj]; + float &sourceB = btemp[ti * TS + tj]; + + // Apply inverse gamma sRGB + sourceR = Color::igamma_srgb(out_rgbx[tj * 4 + 0]); + sourceG = Color::igamma_srgb(out_rgbx[tj * 4 + 1]); + sourceB = Color::igamma_srgb(out_rgbx[tj * 4 + 2]); + } + + if (!clutAndWorkingProfilesAreSame) { + // Convert from clut to working profile +#ifdef __SSE2__ + if (!(std::min(TS, tW - jstart) & ~3)) { + for (int j = jstart, tj = 0; j < tW; j += 4, tj += 4) { + vfloat sourceR = LVF(rtemp[ti * TS + tj]); + vfloat sourceG = LVF(gtemp[ti * TS + tj]); + vfloat sourceB = LVF(btemp[ti * TS + tj]); + + vfloat x; + vfloat y; + vfloat z; + Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz); + Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work); + + STVF(rtemp[ti * TS + tj], sourceR); + STVF(gtemp[ti * TS + tj], sourceG); + STVF(btemp[ti * TS + tj], sourceB); + } } + else +#endif + { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + float &sourceR = rtemp[ti * TS + tj]; + float &sourceG = gtemp[ti * TS + tj]; + float &sourceB = btemp[ti * TS + tj]; - //appply gamma sRGB (default RT) - sourceR = CLIP( Color::gamma_srgb( sourceR ) ); - sourceG = CLIP( Color::gamma_srgb( sourceG ) ); - sourceB = CLIP( Color::gamma_srgb( sourceB ) ); - - float r, g, b; - colorLUT->getRGB( sourceR, sourceG, sourceB, r, g, b ); - // apply strength - sourceR = r * filmSimCorrectedStrength + sourceR * filmSimSourceStrength; - sourceG = g * filmSimCorrectedStrength + sourceG * filmSimSourceStrength; - sourceB = b * filmSimCorrectedStrength + sourceB * filmSimSourceStrength; - // apply inverse gamma sRGB - sourceR = Color::igamma_srgb( sourceR ); - sourceG = Color::igamma_srgb( sourceG ); - sourceB = Color::igamma_srgb( sourceB ); - - if (!clutAndWorkingProfilesAreSame) { - //convert from clut to working profile - float x, y, z; - Color::rgbxyz( sourceR, sourceG, sourceB, x, y, z, clut2xyz ); - Color::xyz2rgb( x, y, z, sourceR, sourceG, sourceB, xyz2work ); + float x, y, z; + Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, clut2xyz); + Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, xyz2work); + } } - } } } - if(!blackwhite) { + if (!blackwhite) { // ready, fill lab for (int i = istart, ti = 0; i < tH; i++, ti++) { for (int j = jstart, tj = 0; j < tW; j++, tj++) { diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 5af070a24..1ec878c66 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -861,7 +861,7 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p // if clut was used and size of clut cache == 1 we free the memory used by the clutstore (default clut cache size = 1 for 32 bit OS) if ( params.filmSimulation.enabled && !params.filmSimulation.clutFilename.empty() && options.clutCacheSize == 1) { - clutStore.clearCache(); + CLUTStore::getInstance().clearCache(); } // freeing up some memory diff --git a/rtgui/filmsimulation.cc b/rtgui/filmsimulation.cc index 07f85df94..f916a5397 100644 --- a/rtgui/filmsimulation.cc +++ b/rtgui/filmsimulation.cc @@ -72,7 +72,7 @@ void FilmSimulation::onClutSelected() if ( getEnabled() && !currentClutFilename.empty() && listener && currentClutFilename != m_oldClutFilename ) { Glib::ustring clutName, dummy; - splitClutFilename( currentClutFilename, clutName, dummy, dummy ); + HaldCLUT::splitClutFilename( currentClutFilename, clutName, dummy, dummy ); listener->panelChanged( EvFilmSimulationFilename, clutName ); m_oldClutFilename = currentClutFilename; @@ -132,7 +132,7 @@ void FilmSimulation::read( const rtengine::procparams::ProcParams* pp, const Par if ( !get_inconsistent() && !pp->filmSimulation.enabled ) { if (options.clutCacheSize == 1) { - clutStore.clearCache(); + CLUTStore::getInstance().clearCache(); } } @@ -279,7 +279,7 @@ int ClutComboBox::parseDir (const Glib::ustring& path) for (const auto& entry : entries) { Glib::ustring name, extension, profileName; - splitClutFilename (entry, name, extension, profileName); + HaldCLUT::splitClutFilename (entry, name, extension, profileName); extension = extension.casefold (); if (extension.compare ("tif") != 0 && extension.compare ("png") != 0) { diff --git a/rtgui/preferences.cc b/rtgui/preferences.cc index ce461afec..dec4c9f6a 100644 --- a/rtgui/preferences.cc +++ b/rtgui/preferences.cc @@ -556,9 +556,9 @@ Gtk::Widget* Preferences::getPerformancePanel () clutCacheSizeSB->set_increments (1, 5); clutCacheSizeSB->set_max_length(2); // Will this be sufficient? :) #ifdef _OPENMP - clutCacheSizeSB->set_range (1, 2 * omp_get_num_procs()); + clutCacheSizeSB->set_range (1, 3 * omp_get_num_procs()); #else - clutCacheSizeSB->set_range (1, 8); + clutCacheSizeSB->set_range (1, 12); #endif clutCacheSizeHB->pack_start (*CLUTLl, Gtk::PACK_SHRINK, 0); clutCacheSizeHB->pack_end (*clutCacheSizeSB, Gtk::PACK_SHRINK, 0);