/* * This file is part of RawTherapee. * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * RawTherapee is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . * * 2010 Ilya Popov * 2012 Emil Martinec * 2014 Ingo Weyrich */ #ifndef CPLX_WAVELET_LEVEL_H_INCLUDED #define CPLX_WAVELET_LEVEL_H_INCLUDED #include #include "rt_math.h" #include "opthelper.h" #include "stdio.h" namespace rtengine { template class wavelet_level { // level of decomposition int lvl; // whether to subsample the output bool subsamp_out; int numThreads; // spacing of filter taps int skip; bool bigBlockOfMemory; // allocation and destruction of data storage T ** create(int n); void destroy(T ** subbands); // load a row/column of input data, possibly with padding void AnalysisFilterHaarVertical (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int height, const int row); void AnalysisFilterHaarHorizontal (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int row); void SynthesisFilterHaarHorizontal (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height); void SynthesisFilterHaarVertical (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height); void AnalysisFilterSubsampHorizontal (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi, const int taps, const int offset, const int srcwidth, const int dstwidth, const int row); #ifdef __SSE2__ void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float (*filterLo)[4], float (*filterHi)[4], const int taps, const int offset, const int width, const int height, const int row); #else void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi, int const taps, const int offset, const int width, const int height, const int row); #endif void SynthesisFilterSubsampHorizontal (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, const int taps, const int offset, const int scrwidth, const int dstwidth, const int height); #ifdef __SSE2__ void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float (*filterLo)[4], float (*filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend); #else void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend); #endif public: bool memoryAllocationFailed; T ** wavcoeffs; // full size int m_w, m_h; // size of low frequency part int m_w2, m_h2; template wavelet_level(E * src, E * dst, int level, int subsamp, int w, int h, float *filterV, float *filterH, int len, int offset, int skipcrop, int numThreads) : lvl(level), subsamp_out((subsamp>>level)&1), numThreads(numThreads), skip(1<>n)&1); } skip /= skipcrop; if(skip < 1) skip=1; } m_w2 = (subsamp_out ? (w+1)/2 : w); m_h2 = (subsamp_out ? (h+1)/2 : h); wavcoeffs = create((m_w2)*(m_h2)); if(!memoryAllocationFailed) decompose_level(src, dst, filterV, filterH, len, offset); } ~wavelet_level() { destroy(wavcoeffs); } T ** subbands() const { return wavcoeffs; } T * lopass() const { return wavcoeffs[0]; } int width() const { return m_w2; } int height() const { return m_h2; } int stride() const { return skip; } bool bigBlockOfMemoryUsed() const { return bigBlockOfMemory; } template void decompose_level(E *src, E *dst, float *filterV, float *filterH, int len, int offset); template void reconstruct_level(E* tmpLo, E* tmpHi, E *src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend = 1.f); }; template T ** wavelet_level::create(int n) { T * data = new (std::nothrow) T[3*n]; if(data == NULL) { bigBlockOfMemory = false; } T ** subbands = new T*[4]; for(int j = 1; j < 4; j++) { if(bigBlockOfMemory) subbands[j] = data + n * (j-1); else { subbands[j] = new (std::nothrow) T[n]; if(subbands[j] == NULL) { printf("Couldn't allocate memory in level %d of wavelet\n",lvl); memoryAllocationFailed = true; } } } return subbands; } template void wavelet_level::destroy(T ** subbands) { if(subbands) { if(bigBlockOfMemory) delete[] subbands[1]; else { for(int j = 1; j < 4; j++) { if(subbands[j] != NULL) delete[] subbands[j]; } } delete[] subbands; } } template void wavelet_level::AnalysisFilterHaarHorizontal (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int row) { /* Basic convolution code * Applies a Haar filter */ for(int i = 0; i < (width - skip); i++) { dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i+skip]); dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i+skip]); } for(int i = max(width-skip,skip); i < (width); i++) { dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i-skip]); dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i-skip]); } } template void wavelet_level::AnalysisFilterHaarVertical (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int height, const int row) { /* Basic convolution code * Applies a Haar filter */ if(row < (height - skip)) { for(int j=0;j=max(height-skip,skip)) { for(int j=0;j void wavelet_level::SynthesisFilterHaarHorizontal (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) { /* Basic convolution code * Applies a Haar filter * */ #ifdef _RT_NESTED_OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif for (int k=0; k void wavelet_level::SynthesisFilterHaarVertical (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) { /* Basic convolution code * Applies a Haar filter * */ #ifdef _RT_NESTED_OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { #ifdef _RT_NESTED_OPENMP #pragma omp for nowait #endif for(int i = 0; i < skip; i++) { for(int j=0;j void wavelet_level::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float *RESTRICT filterHi, const int taps, const int offset, const int srcwidth, const int dstwidth, const int row) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ // calculate coefficients for(int i = 0; i < srcwidth; i+=2) { float lo = 0.f, hi = 0.f; if (LIKELY(i>skip*taps && i SSEFUNCTION void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int height, const int row) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ // calculate coefficients if (LIKELY(row>skip*taps && row void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int width, const int height, const int row) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ // calculate coefficients if (LIKELY(row>skip*taps && row void wavelet_level::SynthesisFilterSubsampHorizontal (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int srcwidth, const int dstwidth, const int height) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ // calculate coefficients int shift = skip*(taps-offset-1);//align filter with data #ifdef _RT_NESTED_OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif for (int k=0; k SSEFUNCTION void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ const float srcFactor = 1.f - blend; // calculate coefficients int shift=skip*(taps-offset-1);//align filter with data __m128 fourv = _mm_set1_ps(4.f); __m128 srcFactorv = _mm_set1_ps(srcFactor); __m128 dstFactorv = _mm_set1_ps(blend); #ifdef _RT_NESTED_OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif for(int i = 0; i < dstheight; i++) { int i_src = (i+shift)/2; int begin = (i+shift)%2; //TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip' if (LIKELY(i>skip*taps && i<(dstheight-skip*taps))) {//bulk int k; for (k=0; k void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with * the input pixel, and skipping 'skip' pixels between taps * Output is subsampled by two */ const float srcFactor = 1.f - blend; // calculate coefficients int shift=skip*(taps-offset-1);//align filter with data #ifdef _RT_NESTED_OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif for(int i = 0; i < dstheight; i++) { int i_src = (i+shift)/2; int begin = (i+shift)%2; //TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip' if (LIKELY(i>skip*taps && i<(dstheight-skip*taps))) {//bulk for (int k=0; k template SSEFUNCTION void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { /* filter along rows and columns */ float filterVarray[2*taps][4] ALIGNED64; if(subsamp_out) { for(int i=0;i<2*taps;i++) { for(int j=0;j<4;j++) { filterVarray[i][j] = filterV[i]; } } } #ifdef _RT_NESTED_OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { T tmpLo[m_w] ALIGNED64; T tmpHi[m_w] ALIGNED64; if(subsamp_out) { #ifdef _RT_NESTED_OPENMP #pragma omp for #endif for(int row=0;row template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { #ifdef _RT_NESTED_OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { T tmpLo[m_w] ALIGNED64; T tmpHi[m_w] ALIGNED64; /* filter along rows and columns */ if(subsamp_out) { #ifdef _RT_NESTED_OPENMP #pragma omp for #endif for(int row=0;row template SSEFUNCTION void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) { if(memoryAllocationFailed) return; /* filter along rows and columns */ if (subsamp_out) { float filterVarray[2*taps][4] ALIGNED64; for(int i=0;i<2*taps;i++) { for(int j=0;j<4;j++) { filterVarray[i][j] = filterV[i]; } } SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterVarray, filterVarray+taps, taps, offset, m_w, m_h2, m_h, blend); } else { SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2); SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2); SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h); } } #else template template void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) { if(memoryAllocationFailed) return; /* filter along rows and columns */ if (subsamp_out) { SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterV, filterV+taps, taps, offset, m_w, m_h2, m_h, blend); } else { SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2); SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2); SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h); } } #endif }; #endif