reverted some aligned memory access, which seems not to work correctly on some systems

This commit is contained in:
Ingo
2014-12-30 11:36:26 +01:00
parent 07e5213b3d
commit cb54a15b58
2 changed files with 10 additions and 10 deletions

View File

@@ -1764,7 +1764,7 @@ SSEFUNCTION void ImProcFunctions::RGBtile_denoise (float * fLblox, int hblproc,
__m128 noisevar_Ldetailv = _mm_set1_ps( noisevar_Ldetail );
__m128 onev = _mm_set1_ps( 1.0f );
for (int n=0; n<TS*TS; n+=4) { //for DCT
tempv = onev - xexpf( -SQRV( LVF(nbrwt[n]))/noisevar_Ldetailv);
tempv = onev - xexpf( -SQRV( LVFU(nbrwt[n]))/noisevar_Ldetailv);
_mm_storeu_ps( &fLblox[blkstart+n], LVFU(fLblox[blkstart+n]) * tempv );
}//output neighbor averaged result
#else

View File

@@ -745,27 +745,27 @@ template<class T, class A> SSEFUNCTION void boxabsblur (T* src, A* dst, int radx
__m128 tempv,lenv,lenp1v,lenm1v,rlenv;
for (int col = 0; col < W-3; col+=4) {
lenv = leninitv;
tempv = LVF(temp[0*W+col]);
tempv = LVFU(temp[0*W+col]);
for (int i=1; i<=rady; i++) {
tempv = tempv + LVF(temp[i*W+col]);
tempv = tempv + LVFU(temp[i*W+col]);
}
tempv = tempv / lenv;
_mm_store_ps( &dst[0*W+col], tempv );
_mm_storeu_ps( &dst[0*W+col], tempv );
for (int row=1; row<=rady; row++) {
lenp1v = lenv + onev;
tempv = (tempv*lenv + LVF(temp[(row+rady)*W+col]))/lenp1v;
_mm_store_ps( &dst[row*W+col],tempv);
tempv = (tempv*lenv + LVFU(temp[(row+rady)*W+col]))/lenp1v;
_mm_storeu_ps( &dst[row*W+col],tempv);
lenv = lenp1v;
}
rlenv = onev / lenv;
for (int row = rady+1; row < H-rady; row++) {
tempv = tempv + (LVF(temp[(row+rady)*W+col])- LVF(temp[(row-rady-1)*W+col]))*rlenv;
_mm_store_ps( &dst[row*W+col], tempv);
tempv = tempv + (LVFU(temp[(row+rady)*W+col])- LVFU(temp[(row-rady-1)*W+col]))*rlenv;
_mm_storeu_ps( &dst[row*W+col], tempv);
}
for (int row=H-rady; row<H; row++) {
lenm1v = lenv - onev;
tempv = (tempv*lenv - LVF(temp[(row-rady-1)*W+col]))/lenm1v;
_mm_store_ps( &dst[row*W+col], tempv);
tempv = (tempv*lenv - LVFU(temp[(row-rady-1)*W+col]))/lenm1v;
_mm_storeu_ps( &dst[row*W+col], tempv);
lenv = lenm1v;
}
}