Some further cleanup of code (mainly using macros for SSE to increase readability)
This commit is contained in:
@@ -1408,6 +1408,7 @@ SSEFUNCTION int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thres
|
||||
|
||||
#ifdef __SSE2__
|
||||
// sum up 5*4 = 20 values using SSE
|
||||
// 10 fabs function calls and float 10 additions with SSE
|
||||
vfloat sum = vabsf(LVFU(cfablur[(rr - 2) * W + cc - 2])) + vabsf(LVFU(cfablur[(rr - 1) * W + cc - 2]));
|
||||
sum += vabsf(LVFU(cfablur[(rr) * W + cc - 2]));
|
||||
sum += vabsf(LVFU(cfablur[(rr + 1) * W + cc - 2]));
|
||||
@@ -1421,7 +1422,7 @@ SSEFUNCTION int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thres
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// 25 fabs function calls and 25 float additions without SSE
|
||||
for (int mm = rr - 2; mm <= rr + 2; mm++) {
|
||||
for (int nn = cc - 2; nn <= cc + 2; nn++) {
|
||||
hfnbrave += fabsf(cfablur[mm * W + nn]);
|
||||
@@ -2099,11 +2100,11 @@ void RawImageSource::retinexPrepareBuffers(ColorManagementParams cmp, RetinexPar
|
||||
vfloat H, S, L;
|
||||
int pos;
|
||||
Color::rgb2hsl(LVFU(red[i][j]), LVFU(green[i][j]), LVFU(blue[i][j]), H, S, L);
|
||||
_mm_storeu_ps(&conversionBuffer[0][i - border][j - border], H);
|
||||
_mm_storeu_ps(&conversionBuffer[1][i - border][j - border], S);
|
||||
STVFU(conversionBuffer[0][i - border][j - border], H);
|
||||
STVFU(conversionBuffer[1][i - border][j - border], S);
|
||||
L *= c32768;
|
||||
_mm_storeu_ps(&conversionBuffer[2][i - border][j - border], L);
|
||||
_mm_storeu_ps(&conversionBuffer[3][i - border][j - border], H);
|
||||
STVFU(conversionBuffer[2][i - border][j - border], L);
|
||||
STVFU(conversionBuffer[3][i - border][j - border], H);
|
||||
|
||||
if(lhist16RETI) {
|
||||
for(int p = 0; p < 4; p++) {
|
||||
@@ -2423,9 +2424,9 @@ void RawImageSource::retinex(ColorManagementParams cmp, RetinexParams deh, ToneC
|
||||
vfloat R, G, B;
|
||||
Color::hsl2rgb(LVFU(conversionBuffer[0][i - border][j - border]), LVFU(conversionBuffer[1][i - border][j - border]), LVFU(LBuffer[i - border][j - border]) / c32768, R, G, B);
|
||||
|
||||
_mm_storeu_ps(&red[i][j], R);
|
||||
_mm_storeu_ps(&green[i][j], G);
|
||||
_mm_storeu_ps(&blue[i][j], B);
|
||||
STVFU(red[i][j], R);
|
||||
STVFU(green[i][j], G);
|
||||
STVFU(blue[i][j], B);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2510,9 +2511,9 @@ void RawImageSource::retinex(ColorManagementParams cmp, RetinexParams deh, ToneC
|
||||
Color::Lab2XYZ(LVFU(LBuffer[i - border][j - border]), LVFU(conversionBuffer[0][i - border][j - border]), LVFU(conversionBuffer[1][i - border][j - border]), x_, y_, z_) ;
|
||||
Color::xyz2rgb(x_, y_, z_, R, G, B, wipv);
|
||||
|
||||
_mm_storeu_ps(&red[i][j], R);
|
||||
_mm_storeu_ps(&green[i][j], G);
|
||||
_mm_storeu_ps(&blue[i][j], B);
|
||||
STVFU(red[i][j], R);
|
||||
STVFU(green[i][j], G);
|
||||
STVFU(blue[i][j], B);
|
||||
|
||||
}
|
||||
|
||||
@@ -3034,9 +3035,9 @@ SSEFUNCTION void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur
|
||||
|
||||
//vertical blur
|
||||
#ifdef __SSE2__
|
||||
__m128 leninitv = _mm_set1_ps( (float)((int)(boxH / 2 + 1)));
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
__m128 temp1v, temp2v, lenv, lenp1v, lenm1v;
|
||||
vfloat leninitv = F2V( (float)((int)(boxH / 2 + 1)));
|
||||
vfloat onev = F2V( 1.0f );
|
||||
vfloat temp1v, temp2v, lenv, lenp1v, lenm1v;
|
||||
int row;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@@ -3052,29 +3053,29 @@ SSEFUNCTION void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur
|
||||
temp2v += LVFU(cfatmp[(i + 1) * W + col]) / lenv;
|
||||
}
|
||||
|
||||
_mm_storeu_ps(&cfablur[0 * W + col], temp1v);
|
||||
_mm_storeu_ps(&cfablur[1 * W + col], temp2v);
|
||||
STVFU(cfablur[0 * W + col], temp1v);
|
||||
STVFU(cfablur[1 * W + col], temp2v);
|
||||
|
||||
for (row = 2; row < boxH + 2; row += 2) {
|
||||
lenp1v = lenv + onev;
|
||||
temp1v = (temp1v * lenv + LVFU(cfatmp[(row + boxH) * W + col])) / lenp1v;
|
||||
temp2v = (temp2v * lenv + LVFU(cfatmp[(row + boxH + 1) * W + col])) / lenp1v;
|
||||
_mm_storeu_ps( &cfablur[row * W + col], temp1v);
|
||||
_mm_storeu_ps( &cfablur[(row + 1)*W + col], temp2v);
|
||||
STVFU(cfablur[row * W + col], temp1v);
|
||||
STVFU(cfablur[(row + 1)*W + col], temp2v);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
for (; row < H - boxH - 1; row += 2) {
|
||||
temp1v = temp1v + (LVFU(cfatmp[(row + boxH) * W + col]) - LVFU(cfatmp[(row - boxH - 2) * W + col])) / lenv;
|
||||
temp2v = temp2v + (LVFU(cfatmp[(row + 1 + boxH) * W + col]) - LVFU(cfatmp[(row + 1 - boxH - 2) * W + col])) / lenv;
|
||||
_mm_storeu_ps(&cfablur[row * W + col], temp1v);
|
||||
_mm_storeu_ps(&cfablur[(row + 1)*W + col], temp2v);
|
||||
STVFU(cfablur[row * W + col], temp1v);
|
||||
STVFU(cfablur[(row + 1)*W + col], temp2v);
|
||||
}
|
||||
|
||||
for(; row < H - boxH; row++) {
|
||||
temp1v = temp1v + (LVFU(cfatmp[(row + boxH) * W + col]) - LVFU(cfatmp[(row - boxH - 2) * W + col])) / lenv;
|
||||
_mm_storeu_ps(&cfablur[row * W + col], temp1v);
|
||||
__m128 swapv = temp1v;
|
||||
STVFU(cfablur[row * W + col], temp1v);
|
||||
vfloat swapv = temp1v;
|
||||
temp1v = temp2v;
|
||||
temp2v = swapv;
|
||||
|
||||
@@ -3084,15 +3085,15 @@ SSEFUNCTION void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur
|
||||
lenm1v = lenv - onev;
|
||||
temp1v = (temp1v * lenv - LVFU(cfatmp[(row - boxH - 2) * W + col])) / lenm1v;
|
||||
temp2v = (temp2v * lenv - LVFU(cfatmp[(row - boxH - 1) * W + col])) / lenm1v;
|
||||
_mm_storeu_ps(&cfablur[row * W + col], temp1v);
|
||||
_mm_storeu_ps(&cfablur[(row + 1)*W + col], temp2v);
|
||||
STVFU(cfablur[row * W + col], temp1v);
|
||||
STVFU(cfablur[(row + 1)*W + col], temp2v);
|
||||
lenv = lenm1v;
|
||||
}
|
||||
|
||||
for(; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
temp1v = (temp1v * lenv - LVFU(cfatmp[(row - boxH - 2) * W + col])) / lenm1v;
|
||||
_mm_storeu_ps(&cfablur[(row)*W + col], temp1v);
|
||||
STVFU(cfablur[(row)*W + col], temp1v);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user