From 0120bcd836cccc4c74faf20a42a9a4fed048b296 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 1 Nov 2015 13:41:21 +0100 Subject: [PATCH] Speedup for boxblur --- rtengine/boxblur.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index dbfc0ca3a..9fe828131 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -136,7 +136,6 @@ template void boxblurnew (T** src, A** dst, T* buffer, int rad //box blur image; box range = (radx,rady) float* temp = buffer; - if (radx == 0) { #ifdef _OPENMP #pragma omp for @@ -186,12 +185,41 @@ template void boxblurnew (T** src, A** dst, T* buffer, int rad dst[row][col] = temp[row * W + col]; } } else { + const int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache //vertical blur #ifdef _OPENMP #pragma omp for #endif - for (int col = 0; col < W; col++) { + for (int col = 0; col < W-numCols+1; col+=8) { + int len = rady + 1; + for(int k=0;k