/*
* This file is part of RawTherapee.
*
* RawTherapee is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* RawTherapee is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with RawTherapee. If not, see .
*
* 2010 Ilya Popov
* 2012 Emil Martinec
* 2014 Ingo Weyrich
*/
#ifndef CPLX_WAVELET_LEVEL_H_INCLUDED
#define CPLX_WAVELET_LEVEL_H_INCLUDED
#include
#include "rt_math.h"
#include "opthelper.h"
#include "stdio.h"
namespace rtengine {
template
class wavelet_level
{
// level of decomposition
int lvl;
// whether to subsample the output
bool subsamp_out;
int numThreads;
// spacing of filter taps
int skip;
bool bigBlockOfMemory;
// allocation and destruction of data storage
T ** create(int n);
void destroy(T ** subbands);
// load a row/column of input data, possibly with padding
void AnalysisFilterHaarVertical (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int height, const int row);
void AnalysisFilterHaarHorizontal (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int row);
void SynthesisFilterHaarHorizontal (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height);
void SynthesisFilterHaarVertical (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height);
void AnalysisFilterSubsampHorizontal (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi,
const int taps, const int offset, const int srcwidth, const int dstwidth, const int row);
#ifdef __SSE2__
void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float (*filterLo)[4], float (*filterHi)[4],
const int taps, const int offset, const int width, const int height, const int row);
#else
void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi,
int const taps, const int offset, const int width, const int height, const int row);
#endif
void SynthesisFilterSubsampHorizontal (T * srcLo, T * srcHi, T * dst,
float *filterLo, float *filterHi, const int taps, const int offset, const int scrwidth, const int dstwidth, const int height);
#ifdef __SSE2__
void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float (*filterLo)[4], float (*filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend);
#else
void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend);
#endif
public:
bool memoryAllocationFailed;
T ** wavcoeffs;
// full size
int m_w, m_h;
// size of low frequency part
int m_w2, m_h2;
template
wavelet_level(E * src, E * dst, int level, int subsamp, int w, int h, float *filterV, float *filterH, int len, int offset, int skipcrop, int numThreads)
: lvl(level), subsamp_out((subsamp>>level)&1), numThreads(numThreads), skip(1<>n)&1);
}
skip /= skipcrop;
if(skip < 1) skip=1;
}
m_w2 = (subsamp_out ? (w+1)/2 : w);
m_h2 = (subsamp_out ? (h+1)/2 : h);
wavcoeffs = create((m_w2)*(m_h2));
if(!memoryAllocationFailed)
decompose_level(src, dst, filterV, filterH, len, offset);
}
~wavelet_level() {
destroy(wavcoeffs);
}
T ** subbands() const {
return wavcoeffs;
}
T * lopass() const {
return wavcoeffs[0];
}
int width() const {
return m_w2;
}
int height() const {
return m_h2;
}
int stride() const {
return skip;
}
bool bigBlockOfMemoryUsed() const {
return bigBlockOfMemory;
}
template
void decompose_level(E *src, E *dst, float *filterV, float *filterH, int len, int offset);
template
void reconstruct_level(E* tmpLo, E* tmpHi, E *src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend = 1.f);
};
template
T ** wavelet_level::create(int n) {
T * data = new (std::nothrow) T[3*n];
if(data == NULL) {
bigBlockOfMemory = false;
}
T ** subbands = new T*[4];
for(int j = 1; j < 4; j++) {
if(bigBlockOfMemory)
subbands[j] = data + n * (j-1);
else {
subbands[j] = new (std::nothrow) T[n];
if(subbands[j] == NULL) {
printf("Couldn't allocate memory in level %d of wavelet\n",lvl);
memoryAllocationFailed = true;
}
}
}
return subbands;
}
template
void wavelet_level::destroy(T ** subbands) {
if(subbands) {
if(bigBlockOfMemory)
delete[] subbands[1];
else {
for(int j = 1; j < 4; j++) {
if(subbands[j] != NULL)
delete[] subbands[j];
}
}
delete[] subbands;
}
}
template
void wavelet_level::AnalysisFilterHaarHorizontal (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int row) {
/* Basic convolution code
* Applies a Haar filter
*/
for(int i = 0; i < (width - skip); i++) {
dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i+skip]);
dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i+skip]);
}
for(int i = max(width-skip,skip); i < (width); i++) {
dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i-skip]);
dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i-skip]);
}
}
template void wavelet_level::AnalysisFilterHaarVertical (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int height, const int row) {
/* Basic convolution code
* Applies a Haar filter
*/
if(row < (height - skip)) {
for(int j=0;j=max(height-skip,skip)) {
for(int j=0;j void wavelet_level::SynthesisFilterHaarHorizontal (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) {
/* Basic convolution code
* Applies a Haar filter
*
*/
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
#endif
for (int k=0; k void wavelet_level::SynthesisFilterHaarVertical (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) {
/* Basic convolution code
* Applies a Haar filter
*
*/
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
#endif
{
#ifdef _RT_NESTED_OPENMP
#pragma omp for nowait
#endif
for(int i = 0; i < skip; i++) {
for(int j=0;j
void wavelet_level::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float *RESTRICT filterHi,
const int taps, const int offset, const int srcwidth, const int dstwidth, const int row) {
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
// calculate coefficients
for(int i = 0; i < srcwidth; i+=2) {
float lo = 0.f, hi = 0.f;
if (LIKELY(i>skip*taps && i SSEFUNCTION void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4],
const int taps, const int offset, const int width, const int height, const int row) {
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
// calculate coefficients
if (LIKELY(row>skip*taps && row void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float * RESTRICT filterHi,
const int taps, const int offset, const int width, const int height, const int row) {
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
// calculate coefficients
if (LIKELY(row>skip*taps && row void wavelet_level::SynthesisFilterSubsampHorizontal (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int srcwidth, const int dstwidth, const int height) {
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
// calculate coefficients
int shift = skip*(taps-offset-1);//align filter with data
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
#endif
for (int k=0; k SSEFUNCTION void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend)
{
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
const float srcFactor = 1.f - blend;
// calculate coefficients
int shift=skip*(taps-offset-1);//align filter with data
__m128 fourv = _mm_set1_ps(4.f);
__m128 srcFactorv = _mm_set1_ps(srcFactor);
__m128 dstFactorv = _mm_set1_ps(blend);
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
#endif
for(int i = 0; i < dstheight; i++) {
int i_src = (i+shift)/2;
int begin = (i+shift)%2;
//TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip'
if (LIKELY(i>skip*taps && i<(dstheight-skip*taps))) {//bulk
int k;
for (k=0; k void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend)
{
/* Basic convolution code
* Applies an FIR filter 'filter' with filter length 'taps',
* aligning the 'offset' element of the filter with
* the input pixel, and skipping 'skip' pixels between taps
* Output is subsampled by two
*/
const float srcFactor = 1.f - blend;
// calculate coefficients
int shift=skip*(taps-offset-1);//align filter with data
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
#endif
for(int i = 0; i < dstheight; i++) {
int i_src = (i+shift)/2;
int begin = (i+shift)%2;
//TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip'
if (LIKELY(i>skip*taps && i<(dstheight-skip*taps))) {//bulk
for (int k=0; k template SSEFUNCTION void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) {
/* filter along rows and columns */
float filterVarray[2*taps][4] ALIGNED64;
if(subsamp_out) {
for(int i=0;i<2*taps;i++) {
for(int j=0;j<4;j++) {
filterVarray[i][j] = filterV[i];
}
}
}
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
#endif
{
T tmpLo[m_w] ALIGNED64;
T tmpHi[m_w] ALIGNED64;
if(subsamp_out) {
#ifdef _RT_NESTED_OPENMP
#pragma omp for
#endif
for(int row=0;row template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) {
#ifdef _RT_NESTED_OPENMP
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
#endif
{
T tmpLo[m_w] ALIGNED64;
T tmpHi[m_w] ALIGNED64;
/* filter along rows and columns */
if(subsamp_out) {
#ifdef _RT_NESTED_OPENMP
#pragma omp for
#endif
for(int row=0;row template SSEFUNCTION void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) {
if(memoryAllocationFailed)
return;
/* filter along rows and columns */
if (subsamp_out) {
float filterVarray[2*taps][4] ALIGNED64;
for(int i=0;i<2*taps;i++) {
for(int j=0;j<4;j++) {
filterVarray[i][j] = filterV[i];
}
}
SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2);
SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2);
SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterVarray, filterVarray+taps, taps, offset, m_w, m_h2, m_h, blend);
} else {
SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2);
SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2);
SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h);
}
}
#else
template template void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) {
if(memoryAllocationFailed)
return;
/* filter along rows and columns */
if (subsamp_out) {
SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2);
SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2);
SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterV, filterV+taps, taps, offset, m_w, m_h2, m_h, blend);
} else {
SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2);
SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2);
SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h);
}
}
#endif
};
#endif