Moved findMinMaxPercentile() to rt_algo.*, use bool multiThread in fattal tonemapper, fixes #4195
This commit is contained in:
parent
dc248860a1
commit
0950963f84
@ -106,6 +106,7 @@ set(RTENGINESOURCEFILES
|
|||||||
rawimage.cc
|
rawimage.cc
|
||||||
rawimagesource.cc
|
rawimagesource.cc
|
||||||
refreshmap.cc
|
refreshmap.cc
|
||||||
|
rt_algo.cc
|
||||||
rtthumbnail.cc
|
rtthumbnail.cc
|
||||||
shmap.cc
|
shmap.cc
|
||||||
simpleprocess.cc
|
simpleprocess.cc
|
||||||
|
163
rtengine/rt_algo.cc
Normal file
163
rtengine/rt_algo.cc
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of RawTherapee.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Ingo Weyrich <heckflosse67@gmx.de>
|
||||||
|
*
|
||||||
|
* RawTherapee is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* RawTherapee is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cassert>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdint>
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace rtengine
|
||||||
|
{
|
||||||
|
void findMinMaxPercentile (const float *data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multithread)
|
||||||
|
{
|
||||||
|
// we need to find the (minPrct*size) smallest value and the (maxPrct*size) smallest value in data
|
||||||
|
// We use a histogram based search for speed and to reduce memory usage
|
||||||
|
// memory usage of this method is histoSize * sizeof(uint32_t) * (t + 1) byte,
|
||||||
|
// where t is the number of threads and histoSize is in [1;65536]
|
||||||
|
// The current implementation is not guaranteed to work correctly if size > 2^32 (4294967296)
|
||||||
|
assert (minPrct <= maxPrct);
|
||||||
|
|
||||||
|
if(size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t numThreads = 1;
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// Because we have an overhead in the critical region of the main loop for each thread
|
||||||
|
// we make a rough calculation to reduce the number of threads for small data size
|
||||||
|
// This also works fine for the minmax loop
|
||||||
|
if(multithread) {
|
||||||
|
size_t maxThreads = omp_get_max_threads();
|
||||||
|
while (size > numThreads * numThreads * 16384 && numThreads < maxThreads) {
|
||||||
|
++numThreads;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// We need min and max value of data to calculate the scale factor for the histogram
|
||||||
|
float minVal = data[0];
|
||||||
|
float maxVal = data[0];
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for reduction(min:minVal) reduction(max:maxVal) num_threads(numThreads)
|
||||||
|
#endif
|
||||||
|
for (size_t i = 1; i < size; ++i) {
|
||||||
|
minVal = std::min(minVal, data[i]);
|
||||||
|
maxVal = std::max(maxVal, data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(std::fabs(maxVal - minVal) == 0.f) { // fast exit, also avoids division by zero in calculation of scale factor
|
||||||
|
minOut = maxOut = minVal;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// caution: currently this works correctly only for histoSize in range[1;65536]
|
||||||
|
// for small data size (i.e. thumbnails) we reduce the size of the histogram to the size of data
|
||||||
|
const unsigned int histoSize = std::min(static_cast<size_t>(65536), size);
|
||||||
|
|
||||||
|
// calculate scale factor to use full range of histogram
|
||||||
|
const float scale = (histoSize - 1) / (maxVal - minVal);
|
||||||
|
|
||||||
|
// We need one main histogram
|
||||||
|
std::vector<uint32_t> histo(histoSize, 0);
|
||||||
|
|
||||||
|
if(numThreads == 1) {
|
||||||
|
// just one thread => use main histogram
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
// we have to subtract minVal and multiply with scale to get the data in [0;histosize] range
|
||||||
|
histo[ (uint16_t) (scale * (data[i] - minVal))]++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel num_threads(numThreads)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
// We need one histogram per thread
|
||||||
|
std::vector<uint32_t> histothr(histoSize, 0);
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp for nowait
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
// we have to subtract minVal and multiply with scale to get the data in [0;histosize] range
|
||||||
|
histothr[ (uint16_t) (scale * (data[i] - minVal))]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp critical
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
// add per thread histogram to main histogram
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(size_t i = 0; i < histoSize; ++i) {
|
||||||
|
histo[i] += histothr[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t k = 0;
|
||||||
|
size_t count = 0;
|
||||||
|
|
||||||
|
// find (minPrct*size) smallest value
|
||||||
|
const float threshmin = minPrct * size;
|
||||||
|
while (count < threshmin) {
|
||||||
|
count += histo[k++];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k > 0) { // interpolate
|
||||||
|
size_t count_ = count - histo[k - 1];
|
||||||
|
float c0 = count - threshmin;
|
||||||
|
float c1 = threshmin - count_;
|
||||||
|
minOut = (c1 * k + c0 * (k - 1)) / (c0 + c1);
|
||||||
|
} else {
|
||||||
|
minOut = k;
|
||||||
|
}
|
||||||
|
// go back to original range
|
||||||
|
minOut /= scale;
|
||||||
|
minOut += minVal;
|
||||||
|
|
||||||
|
// find (maxPrct*size) smallest value
|
||||||
|
const float threshmax = maxPrct * size;
|
||||||
|
while (count < threshmax) {
|
||||||
|
count += histo[k++];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k > 0) { // interpolate
|
||||||
|
size_t count_ = count - histo[k - 1];
|
||||||
|
float c0 = count - threshmax;
|
||||||
|
float c1 = threshmax - count_;
|
||||||
|
maxOut = (c1 * k + c0 * (k - 1)) / (c0 + c1);
|
||||||
|
} else {
|
||||||
|
maxOut = k;
|
||||||
|
}
|
||||||
|
// go back to original range
|
||||||
|
maxOut /= scale;
|
||||||
|
maxOut += minVal;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
28
rtengine/rt_algo.h
Normal file
28
rtengine/rt_algo.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of RawTherapee.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Ingo Weyrich <heckflosse67@gmx.de>
|
||||||
|
*
|
||||||
|
* RawTherapee is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* RawTherapee is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
namespace rtengine
|
||||||
|
{
|
||||||
|
void findMinMaxPercentile (const float *data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multiThread = true);
|
||||||
|
|
||||||
|
}
|
@ -73,6 +73,8 @@
|
|||||||
#include "StopWatch.h"
|
#include "StopWatch.h"
|
||||||
#include "sleef.c"
|
#include "sleef.c"
|
||||||
#include "opthelper.h"
|
#include "opthelper.h"
|
||||||
|
#include "rt_algo.h"
|
||||||
|
|
||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -167,7 +169,7 @@ void downSample (const Array2Df& A, Array2Df& B)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gaussianBlur (const Array2Df& I, Array2Df& L)
|
void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread)
|
||||||
{
|
{
|
||||||
const int width = I.getCols();
|
const int width = I.getCols();
|
||||||
const int height = I.getRows();
|
const int height = I.getRows();
|
||||||
@ -185,7 +187,7 @@ void gaussianBlur (const Array2Df& I, Array2Df& L)
|
|||||||
Array2Df T (width, height);
|
Array2Df T (width, height);
|
||||||
|
|
||||||
//--- X blur
|
//--- X blur
|
||||||
#pragma omp parallel for shared(I, T)
|
#pragma omp parallel for shared(I, T) if(multithread)
|
||||||
|
|
||||||
for ( int y = 0 ; y < height ; y++ ) {
|
for ( int y = 0 ; y < height ; y++ ) {
|
||||||
for ( int x = 1 ; x < width - 1 ; x++ ) {
|
for ( int x = 1 ; x < width - 1 ; x++ ) {
|
||||||
@ -200,7 +202,7 @@ void gaussianBlur (const Array2Df& I, Array2Df& L)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//--- Y blur
|
//--- Y blur
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for ( int x = 0 ; x < width - 7 ; x += 8 ) {
|
for ( int x = 0 ; x < width - 7 ; x += 8 ) {
|
||||||
for ( int y = 1 ; y < height - 1 ; y++ ) {
|
for ( int y = 1 ; y < height - 1 ; y++ ) {
|
||||||
@ -231,7 +233,7 @@ void gaussianBlur (const Array2Df& I, Array2Df& L)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels)
|
void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels, bool multithread)
|
||||||
{
|
{
|
||||||
int width = H->getCols();
|
int width = H->getCols();
|
||||||
int height = H->getRows();
|
int height = H->getRows();
|
||||||
@ -245,7 +247,7 @@ void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Array2Df* L = new Array2Df (width, height);
|
Array2Df* L = new Array2Df (width, height);
|
||||||
gaussianBlur ( *pyramids[0], *L );
|
gaussianBlur ( *pyramids[0], *L, multithread );
|
||||||
|
|
||||||
for ( int k = 1 ; k < nlevels ; k++ ) {
|
for ( int k = 1 ; k < nlevels ; k++ ) {
|
||||||
if (width > 2 && height > 2) {
|
if (width > 2 && height > 2) {
|
||||||
@ -267,7 +269,7 @@ void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels)
|
|||||||
if (k < nlevels - 1) {
|
if (k < nlevels - 1) {
|
||||||
delete L;
|
delete L;
|
||||||
L = new Array2Df (width, height);
|
L = new Array2Df (width, height);
|
||||||
gaussianBlur ( *pyramids[k], *L );
|
gaussianBlur ( *pyramids[k], *L, multithread );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,14 +278,14 @@ void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels)
|
|||||||
|
|
||||||
//--------------------------------------------------------------------
|
//--------------------------------------------------------------------
|
||||||
|
|
||||||
float calculateGradients (Array2Df* H, Array2Df* G, int k)
|
float calculateGradients (Array2Df* H, Array2Df* G, int k, bool multithread)
|
||||||
{
|
{
|
||||||
const int width = H->getCols();
|
const int width = H->getCols();
|
||||||
const int height = H->getRows();
|
const int height = H->getRows();
|
||||||
const float divider = pow ( 2.0f, k + 1 );
|
const float divider = pow ( 2.0f, k + 1 );
|
||||||
float avgGrad = 0.0f;
|
float avgGrad = 0.0f;
|
||||||
|
|
||||||
#pragma omp parallel for reduction(+:avgGrad)
|
#pragma omp parallel for reduction(+:avgGrad) if(multithread)
|
||||||
|
|
||||||
for ( int y = 0 ; y < height ; y++ ) {
|
for ( int y = 0 ; y < height ; y++ ) {
|
||||||
int n = (y == 0 ? 0 : y - 1);
|
int n = (y == 0 ? 0 : y - 1);
|
||||||
@ -350,44 +352,34 @@ void upSample (const Array2Df& A, Array2Df& B)
|
|||||||
|
|
||||||
void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
|
void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
|
||||||
float avgGrad[], int nlevels, int detail_level,
|
float avgGrad[], int nlevels, int detail_level,
|
||||||
float alfa, float beta, float noise)
|
float alfa, float beta, float noise, bool multithread)
|
||||||
{
|
{
|
||||||
const bool newfattal = true;
|
|
||||||
int width = gradients[nlevels - 1]->getCols();
|
int width = gradients[nlevels - 1]->getCols();
|
||||||
int height = gradients[nlevels - 1]->getRows();
|
int height = gradients[nlevels - 1]->getRows();
|
||||||
Array2Df** fi = new Array2Df*[nlevels];
|
Array2Df** fi = new Array2Df*[nlevels];
|
||||||
|
|
||||||
fi[nlevels - 1] = new Array2Df (width, height);
|
fi[nlevels - 1] = new Array2Df (width, height);
|
||||||
|
|
||||||
if (newfattal) {
|
#pragma omp parallel for shared(fi) if(multithread)
|
||||||
//#pragma omp parallel for shared(fi)
|
|
||||||
for ( int k = 0 ; k < width * height ; k++ ) {
|
for ( int k = 0 ; k < width * height ; k++ ) {
|
||||||
(*fi[nlevels - 1]) (k) = 1.0f;
|
(*fi[nlevels - 1]) (k) = 1.0f;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for ( int k = nlevels - 1; k >= 0 ; k-- ) {
|
for ( int k = nlevels - 1; k >= 0 ; k-- ) {
|
||||||
width = gradients[k]->getCols();
|
width = gradients[k]->getCols();
|
||||||
height = gradients[k]->getRows();
|
height = gradients[k]->getRows();
|
||||||
|
|
||||||
// only apply gradients to levels>=detail_level but at least to the coarsest
|
// only apply gradients to levels>=detail_level but at least to the coarsest
|
||||||
if ( k >= detail_level
|
if ((k >= detail_level || k == nlevels - 1) && beta != 1.f) {
|
||||||
|| k == nlevels - 1
|
|
||||||
|| newfattal == false) {
|
|
||||||
//DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl;
|
//DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl;
|
||||||
#pragma omp parallel for shared(fi,avgGrad)
|
#pragma omp parallel for shared(fi,avgGrad) if(multithread)
|
||||||
for ( int y = 0; y < height; y++ ) {
|
for ( int y = 0; y < height; y++ ) {
|
||||||
for ( int x = 0; x < width; x++ ) {
|
for ( int x = 0; x < width; x++ ) {
|
||||||
float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y);
|
float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y);
|
||||||
float a = alfa * avgGrad[k];
|
float a = alfa * avgGrad[k];
|
||||||
|
|
||||||
float value = pow ((grad + noise) / a, beta - 1.0f);
|
float value = pow ((grad + noise) / a, beta - 1.0f);
|
||||||
|
|
||||||
if (newfattal) {
|
|
||||||
(*fi[k]) (x, y) *= value;
|
(*fi[k]) (x, y) *= value;
|
||||||
} else {
|
|
||||||
(*fi[k]) (x, y) = value;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -401,9 +393,9 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
|
|||||||
fi[0] = FI; // highest level -> result
|
fi[0] = FI; // highest level -> result
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( k > 0 && newfattal ) {
|
if (k > 0) {
|
||||||
upSample (*fi[k], *fi[k - 1]); // upsample to next level
|
upSample (*fi[k], *fi[k - 1]); // upsample to next level
|
||||||
gaussianBlur (*fi[k - 1], *fi[k - 1]);
|
gaussianBlur (*fi[k - 1], *fi[k - 1], multithread);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -414,80 +406,6 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
|
|||||||
delete[] fi;
|
delete[] fi;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
|
||||||
void findMaxMinPercentile (const Array2Df& I,
|
|
||||||
float minPrct, float& minLum,
|
|
||||||
float maxPrct, float& maxLum)
|
|
||||||
{
|
|
||||||
assert (minPrct <= maxPrct);
|
|
||||||
|
|
||||||
const int size = I.getRows() * I.getCols();
|
|
||||||
const float* data = I.data();
|
|
||||||
|
|
||||||
// we need to find the (minPrct*size) smallest value and the (maxPrct*size) smallest value in I
|
|
||||||
// We use a histogram based search for speed and to reduce memory usage
|
|
||||||
// memory usage of this method is 65536 * sizeof(float) * (t + 1) byte, where t is the number of threads
|
|
||||||
|
|
||||||
// We need one global histogram
|
|
||||||
LUTu histo (65536, LUT_CLIP_BELOW | LUT_CLIP_ABOVE);
|
|
||||||
histo.clear();
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp parallel
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
// We need one histogram per thread
|
|
||||||
LUTu histothr (65536, LUT_CLIP_BELOW | LUT_CLIP_ABOVE);
|
|
||||||
histothr.clear();
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp for nowait
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
// values are in [0;1] range, so we have to multiply with 65535 to get the histogram index
|
|
||||||
histothr[ (unsigned int) (65535.f * data[i])]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp critical
|
|
||||||
#endif
|
|
||||||
// add per thread histogram to global histogram
|
|
||||||
histo += histothr;
|
|
||||||
}
|
|
||||||
|
|
||||||
int k = 0;
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
// find (minPrct*size) smallest value
|
|
||||||
while (count < minPrct * size) {
|
|
||||||
count += histo[k++];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (k > 0) { // interpolate
|
|
||||||
int count_ = count - histo[k - 1];
|
|
||||||
float c0 = count - minPrct * size;
|
|
||||||
float c1 = minPrct * size - count_;
|
|
||||||
minLum = (c1 * k + c0 * (k - 1)) / ((c0 + c1) * 65535.f);
|
|
||||||
} else {
|
|
||||||
minLum = k / 65535.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find (maxPrct*size) smallest value
|
|
||||||
while (count < maxPrct * size) {
|
|
||||||
count += histo[k++];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (k > 0) { // interpolate
|
|
||||||
int count_ = count - histo[k - 1];
|
|
||||||
float c0 = count - maxPrct * size;
|
|
||||||
float c1 = maxPrct * size - count_;
|
|
||||||
maxLum = (c1 * k + c0 * (k - 1)) / ((c0 + c1) * 65535.f);
|
|
||||||
} else {
|
|
||||||
maxLum = k / 65535.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread);
|
void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread);
|
||||||
|
|
||||||
void tmo_fattal02 (size_t width,
|
void tmo_fattal02 (size_t width,
|
||||||
@ -543,7 +461,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
float minLum = Y (0, 0);
|
float minLum = Y (0, 0);
|
||||||
float maxLum = Y (0, 0);
|
float maxLum = Y (0, 0);
|
||||||
|
|
||||||
#pragma omp parallel for reduction(max:maxLum)
|
#pragma omp parallel for reduction(max:maxLum) if(multithread)
|
||||||
|
|
||||||
for ( int i = 0 ; i < size ; i++ ) {
|
for ( int i = 0 ; i < size ; i++ ) {
|
||||||
maxLum = std::max (maxLum, Y (i));
|
maxLum = std::max (maxLum, Y (i));
|
||||||
@ -552,7 +470,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
Array2Df* H = new Array2Df (width, height);
|
Array2Df* H = new Array2Df (width, height);
|
||||||
float temp = 100.f / maxLum;
|
float temp = 100.f / maxLum;
|
||||||
float eps = 1e-4f;
|
float eps = 1e-4f;
|
||||||
#pragma omp parallel
|
#pragma omp parallel if(multithread)
|
||||||
{
|
{
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
vfloat epsv = F2V (eps);
|
vfloat epsv = F2V (eps);
|
||||||
@ -627,7 +545,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
const int nlevels = 7; // RT -- see above
|
const int nlevels = 7; // RT -- see above
|
||||||
|
|
||||||
Array2Df** pyramids = new Array2Df*[nlevels];
|
Array2Df** pyramids = new Array2Df*[nlevels];
|
||||||
createGaussianPyramids (H, pyramids, nlevels);
|
createGaussianPyramids (H, pyramids, nlevels, multithread);
|
||||||
// ph.setValue(8);
|
// ph.setValue(8);
|
||||||
|
|
||||||
// calculate gradients and its average values on pyramid levels
|
// calculate gradients and its average values on pyramid levels
|
||||||
@ -636,7 +554,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
|
|
||||||
for ( int k = 0 ; k < nlevels ; k++ ) {
|
for ( int k = 0 ; k < nlevels ; k++ ) {
|
||||||
gradients[k] = new Array2Df (pyramids[k]->getCols(), pyramids[k]->getRows());
|
gradients[k] = new Array2Df (pyramids[k]->getCols(), pyramids[k]->getRows());
|
||||||
avgGrad[k] = calculateGradients (pyramids[k], gradients[k], k);
|
avgGrad[k] = calculateGradients (pyramids[k], gradients[k], k, multithread);
|
||||||
delete pyramids[k];
|
delete pyramids[k];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -645,7 +563,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
|
|
||||||
// calculate fi matrix
|
// calculate fi matrix
|
||||||
Array2Df* FI = new Array2Df (width, height);
|
Array2Df* FI = new Array2Df (width, height);
|
||||||
calculateFiMatrix (FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise);
|
calculateFiMatrix (FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise, multithread);
|
||||||
|
|
||||||
// dumpPFS( "FI.pfs", FI, "Y" );
|
// dumpPFS( "FI.pfs", FI, "Y" );
|
||||||
for ( int i = 0 ; i < nlevels ; i++ ) {
|
for ( int i = 0 ; i < nlevels ; i++ ) {
|
||||||
@ -684,7 +602,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
// side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
|
// side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
|
||||||
// Neumann conditions assume U(-1)=U(0)), see also divergence calculation
|
// Neumann conditions assume U(-1)=U(0)), see also divergence calculation
|
||||||
// if (fftsolver)
|
// if (fftsolver)
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for ( size_t y = 0 ; y < height ; y++ ) {
|
for ( size_t y = 0 ; y < height ; y++ ) {
|
||||||
// sets index+1 based on the boundary assumption H(N+1)=H(N-1)
|
// sets index+1 based on the boundary assumption H(N+1)=H(N-1)
|
||||||
@ -702,7 +620,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
delete H;
|
delete H;
|
||||||
|
|
||||||
// calculate divergence
|
// calculate divergence
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for ( size_t y = 0; y < height; ++y ) {
|
for ( size_t y = 0; y < height; ++y ) {
|
||||||
for ( size_t x = 0; x < width; ++x ) {
|
for ( size_t x = 0; x < width; ++x ) {
|
||||||
@ -754,7 +672,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
// {
|
// {
|
||||||
// return;
|
// return;
|
||||||
// }
|
// }
|
||||||
#pragma omp parallel
|
#pragma omp parallel if(multithread)
|
||||||
{
|
{
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
vfloat gammav = F2V (gamma);
|
vfloat gammav = F2V (gamma);
|
||||||
@ -783,10 +701,10 @@ void tmo_fattal02 (size_t width,
|
|||||||
float cut_min = 0.01f * black_point;
|
float cut_min = 0.01f * black_point;
|
||||||
float cut_max = 1.0f - 0.01f * white_point;
|
float cut_max = 1.0f - 0.01f * white_point;
|
||||||
assert (cut_min >= 0.0f && (cut_max <= 1.0f) && (cut_min < cut_max));
|
assert (cut_min >= 0.0f && (cut_max <= 1.0f) && (cut_min < cut_max));
|
||||||
findMaxMinPercentile (L, cut_min, minLum, cut_max, maxLum);
|
findMinMaxPercentile (L.data(), L.getRows() * L.getCols(), cut_min, minLum, cut_max, maxLum, multithread);
|
||||||
float dividor = (maxLum - minLum);
|
float dividor = (maxLum - minLum);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for (size_t i = 0; i < height; ++i) {
|
for (size_t i = 0; i < height; ++i) {
|
||||||
for (size_t j = 0; j < width; ++j) {
|
for (size_t j = 0; j < width; ++j) {
|
||||||
@ -869,7 +787,7 @@ void tmo_fattal02 (size_t width,
|
|||||||
|
|
||||||
// returns T = EVy A EVx^tr
|
// returns T = EVy A EVx^tr
|
||||||
// note, modifies input data
|
// note, modifies input data
|
||||||
void transform_ev2normal (Array2Df *A, Array2Df *T)
|
void transform_ev2normal (Array2Df *A, Array2Df *T, bool multithread)
|
||||||
{
|
{
|
||||||
int width = A->getCols();
|
int width = A->getCols();
|
||||||
int height = A->getRows();
|
int height = A->getRows();
|
||||||
@ -877,7 +795,7 @@ void transform_ev2normal (Array2Df *A, Array2Df *T)
|
|||||||
|
|
||||||
// the discrete cosine transform is not exactly the transform needed
|
// the discrete cosine transform is not exactly the transform needed
|
||||||
// need to scale input values to get the right transformation
|
// need to scale input values to get the right transformation
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for (int y = 1 ; y < height - 1 ; y++ )
|
for (int y = 1 ; y < height - 1 ; y++ )
|
||||||
for (int x = 1 ; x < width - 1 ; x++ ) {
|
for (int x = 1 ; x < width - 1 ; x++ ) {
|
||||||
@ -913,7 +831,7 @@ void transform_ev2normal (Array2Df *A, Array2Df *T)
|
|||||||
|
|
||||||
|
|
||||||
// returns T = EVy^-1 * A * (EVx^-1)^tr
|
// returns T = EVy^-1 * A * (EVx^-1)^tr
|
||||||
void transform_normal2ev (Array2Df *A, Array2Df *T)
|
void transform_normal2ev (Array2Df *A, Array2Df *T, bool multithread)
|
||||||
{
|
{
|
||||||
int width = A->getCols();
|
int width = A->getCols();
|
||||||
int height = A->getRows();
|
int height = A->getRows();
|
||||||
@ -928,7 +846,7 @@ void transform_normal2ev (Array2Df *A, Array2Df *T)
|
|||||||
|
|
||||||
// need to scale the output matrix to get the right transform
|
// need to scale the output matrix to get the right transform
|
||||||
float factor = (1.0f / ((height - 1) * (width - 1)));
|
float factor = (1.0f / ((height - 1) * (width - 1)));
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for (int y = 0 ; y < height ; y++ )
|
for (int y = 0 ; y < height ; y++ )
|
||||||
for (int x = 0 ; x < width ; x++ ) {
|
for (int x = 0 ; x < width ; x++ ) {
|
||||||
@ -1038,7 +956,7 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
|
|||||||
// transforms F into eigenvector space: Ftr =
|
// transforms F into eigenvector space: Ftr =
|
||||||
//DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl;
|
//DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl;
|
||||||
Array2Df* F_tr = buf; //new Array2Df(width,height);
|
Array2Df* F_tr = buf; //new Array2Df(width,height);
|
||||||
transform_normal2ev (F, F_tr);
|
transform_normal2ev (F, F_tr, multithread);
|
||||||
// TODO: F no longer needed so could release memory, but as it is an
|
// TODO: F no longer needed so could release memory, but as it is an
|
||||||
// input parameter we won't do that
|
// input parameter we won't do that
|
||||||
// ph.setValue(50);
|
// ph.setValue(50);
|
||||||
@ -1057,7 +975,7 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
|
|||||||
std::vector<double> l1 = get_lambda (height);
|
std::vector<double> l1 = get_lambda (height);
|
||||||
std::vector<double> l2 = get_lambda (width);
|
std::vector<double> l2 = get_lambda (width);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for (int y = 0 ; y < height ; y++ ) {
|
for (int y = 0 ; y < height ; y++ ) {
|
||||||
for (int x = 0 ; x < width ; x++ ) {
|
for (int x = 0 ; x < width ; x++ ) {
|
||||||
@ -1069,7 +987,7 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
|
|||||||
|
|
||||||
// transforms U_tr back to the normal space
|
// transforms U_tr back to the normal space
|
||||||
//DEBUG_STR << "solve_pde_fft: transform U_tr to normal space (fft)" << std::endl;
|
//DEBUG_STR << "solve_pde_fft: transform U_tr to normal space (fft)" << std::endl;
|
||||||
transform_ev2normal (F_tr, U);
|
transform_ev2normal (F_tr, U, multithread);
|
||||||
// delete F_tr; // no longer needed so release memory
|
// delete F_tr; // no longer needed so release memory
|
||||||
|
|
||||||
// the solution U as calculated will satisfy something like int U = 0
|
// the solution U as calculated will satisfy something like int U = 0
|
||||||
@ -1079,13 +997,13 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
|
|||||||
// (not really needed but good for numerics as we later take exp(U))
|
// (not really needed but good for numerics as we later take exp(U))
|
||||||
//DEBUG_STR << "solve_pde_fft: removing constant from solution" << std::endl;
|
//DEBUG_STR << "solve_pde_fft: removing constant from solution" << std::endl;
|
||||||
float max = 0.f;
|
float max = 0.f;
|
||||||
#pragma omp parallel for reduction(max:max)
|
#pragma omp parallel for reduction(max:max) if(multithread)
|
||||||
|
|
||||||
for (int i = 0; i < width * height; i++) {
|
for (int i = 0; i < width * height; i++) {
|
||||||
max = std::max (max, (*U) (i));
|
max = std::max (max, (*U) (i));
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for if(multithread)
|
||||||
|
|
||||||
for (int i = 0; i < width * height; i++) {
|
for (int i = 0; i < width * height; i++) {
|
||||||
(*U) (i) -= max;
|
(*U) (i) -= max;
|
||||||
@ -1335,8 +1253,6 @@ void ImProcFunctions::ToneMapFattal02 (Imagefloat *rgb)
|
|||||||
rescale_nearest (Yr, L, multiThread);
|
rescale_nearest (Yr, L, multiThread);
|
||||||
tmo_fattal02 (w2, h2, L, L, alpha, beta, noise, detail_level, multiThread);
|
tmo_fattal02 (w2, h2, L, L, alpha, beta, noise, detail_level, multiThread);
|
||||||
|
|
||||||
// tmo_fattal02(w, h, Yr, L, alpha, beta, noise, detail_level, multiThread);
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for if(multiThread)
|
#pragma omp parallel for if(multiThread)
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user