removed streaming code because after adding _mm_mfence() it wasn't faster than the non SSE memset

2016-01-26 23:27:52 +01:00
parent ee665d6790
commit ded93005d9
1 changed files with 0 additions and 12 deletions
--- a/rtengine/amaze_demosaic_RT.cc
+++ b/rtengine/amaze_demosaic_RT.cc
@@ -181,17 +181,7 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw,

        for (int top = winy - 16; top < winy + height; top += ts - 32) {
            for (int left = winx - 16; left < winx + width; left += ts - 32) {
-#ifdef __SSE2__
-                // Using SSE2 we can zero the memory without cache pollution
-                vfloat zerov = ZEROV;
-
-                for(int i = 3 * tsh; i < (ts - 6)*tsh; i += 16) {
-                    _mm_stream_ps((float*)&nyquist[i], zerov);
-                }
-
-#else
                memset(&nyquist[3 * tsh], 0, sizeof(unsigned char) * (ts - 6) * tsh);
-#endif
                //location of tile bottom edge
                int bottom = min(top + ts, winy + height + 16);
                //location of tile right edge
@@ -983,8 +973,6 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw,
                // refine Nyquist areas using G curvatures
                if(doNyquist) {
                    for (int rr = nystartrow; rr < nyendrow; rr++)
-
-                        // TODO_INGO: maybe this part is also worth vectorizing using _mm_movemask_ps
                        for (int indx = rr * ts + nystartcol + (FC(rr, 2) & 1); indx < rr * ts + nyendcol; indx += 2) {

                            if (nyquist2[indx >> 1]) {