diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index d9b6c1ce7..f1d331acd 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -1764,7 +1764,7 @@ SSEFUNCTION void ImProcFunctions::RGBtile_denoise (float * fLblox, int hblproc, __m128 noisevar_Ldetailv = _mm_set1_ps( noisevar_Ldetail ); __m128 onev = _mm_set1_ps( 1.0f ); for (int n=0; n SSEFUNCTION void boxabsblur (T* src, A* dst, int radx __m128 tempv,lenv,lenp1v,lenm1v,rlenv; for (int col = 0; col < W-3; col+=4) { lenv = leninitv; - tempv = LVF(temp[0*W+col]); + tempv = LVFU(temp[0*W+col]); for (int i=1; i<=rady; i++) { - tempv = tempv + LVF(temp[i*W+col]); + tempv = tempv + LVFU(temp[i*W+col]); } tempv = tempv / lenv; - _mm_store_ps( &dst[0*W+col], tempv ); + _mm_storeu_ps( &dst[0*W+col], tempv ); for (int row=1; row<=rady; row++) { lenp1v = lenv + onev; - tempv = (tempv*lenv + LVF(temp[(row+rady)*W+col]))/lenp1v; - _mm_store_ps( &dst[row*W+col],tempv); + tempv = (tempv*lenv + LVFU(temp[(row+rady)*W+col]))/lenp1v; + _mm_storeu_ps( &dst[row*W+col],tempv); lenv = lenp1v; } rlenv = onev / lenv; for (int row = rady+1; row < H-rady; row++) { - tempv = tempv + (LVF(temp[(row+rady)*W+col])- LVF(temp[(row-rady-1)*W+col]))*rlenv; - _mm_store_ps( &dst[row*W+col], tempv); + tempv = tempv + (LVFU(temp[(row+rady)*W+col])- LVFU(temp[(row-rady-1)*W+col]))*rlenv; + _mm_storeu_ps( &dst[row*W+col], tempv); } for (int row=H-rady; row