Merge pull request #3282 from Floessie/dedusting-haldclut-fix

HaldCLUT cleanups after the dust (#3154) has settled
This commit is contained in:
Ingo Weyrich 2016-05-14 11:57:56 +02:00
commit 298a8f417f
2 changed files with 46 additions and 32 deletions

View File

@ -56,7 +56,7 @@ bool loadFile(
img_src.convertColorSpace(img_float.get(), icm, curr_wb); img_src.convertColorSpace(img_float.get(), icm, curr_wb);
} }
AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 8); // + 8 because of SSE4_1 version of getClutValue AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 4); // getClutValues() loads one pixel in advance
std::size_t index = 0; std::size_t index = 0;
@ -78,12 +78,30 @@ bool loadFile(
} }
#ifdef __SSE2__ #ifdef __SSE2__
vfloat getClutValue(const AlignedBuffer<std::uint16_t>& clut_image, size_t index) vfloat2 getClutValues(const AlignedBuffer<std::uint16_t>& clut_image, size_t index)
{ {
const vint v_values = _mm_loadu_si128(reinterpret_cast<const vint*>(clut_image.data + index));
#ifdef __SSE4_1__ #ifdef __SSE4_1__
return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(clut_image.data + index)))); return {
_mm_cvtepi32_ps(_mm_cvtepu16_epi32(v_values)),
_mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_srli_si128(v_values, 8)))
};
#else #else
return _mm_set_ps(clut_image.data[index + 3], clut_image.data[index + 2], clut_image.data[index + 1], clut_image.data[index]); const vint v_mask = _mm_set1_epi32(0x0000FFFF);
vint v_low = _mm_shuffle_epi32(v_values, _MM_SHUFFLE(1, 0, 1, 0));
vint v_high = _mm_shuffle_epi32(v_values, _MM_SHUFFLE(3, 2, 3, 2));
v_low = _mm_shufflelo_epi16(v_low, _MM_SHUFFLE(1, 1, 0, 0));
v_high = _mm_shufflelo_epi16(v_high, _MM_SHUFFLE(1, 1, 0, 0));
v_low = _mm_shufflehi_epi16(v_low, _MM_SHUFFLE(3, 3, 2, 2));
v_high = _mm_shufflehi_epi16(v_high, _MM_SHUFFLE(3, 3, 2, 2));
v_low = vandm(v_low, m_mask);
v_high = vandm(v_high, v_mask);
return {
_mm_cvtepi32_ps(lowval),
_mm_cvtepi32_ps(highval)
};
#endif #endif
} }
#endif #endif
@ -212,11 +230,13 @@ void rtengine::HaldCLUT::getRGB(
const vfloat v_r = PERMUTEPS(v_rgb, _MM_SHUFFLE(0, 0, 0, 0)); const vfloat v_r = PERMUTEPS(v_rgb, _MM_SHUFFLE(0, 0, 0, 0));
vfloat v_tmp1 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); vfloat2 v_clut_values = getClutValues(clut_image, index);
vfloat v_tmp1 = vintpf(v_r, v_clut_values.y, v_clut_values.x);
index = (color + level) * 4; index = (color + level) * 4;
vfloat v_tmp2 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); v_clut_values = getClutValues(clut_image, index);
vfloat v_tmp2 = vintpf(v_r, v_clut_values.y, v_clut_values.x);
const vfloat v_g = PERMUTEPS(v_rgb, _MM_SHUFFLE(1, 1, 1, 1)); const vfloat v_g = PERMUTEPS(v_rgb, _MM_SHUFFLE(1, 1, 1, 1));
@ -224,11 +244,13 @@ void rtengine::HaldCLUT::getRGB(
index = (color + level_square) * 4; index = (color + level_square) * 4;
v_tmp1 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); v_clut_values = getClutValues(clut_image, index);
v_tmp1 = vintpf(v_r, v_clut_values.y, v_clut_values.x);
index = (color + level + level_square) * 4; index = (color + level + level_square) * 4;
v_tmp2 = vintpf(v_r, getClutValue(clut_image, index + 4), getClutValue(clut_image, index)); v_clut_values = getClutValues(clut_image, index);
v_tmp2 = vintpf(v_r, v_clut_values.y, v_clut_values.x);
v_tmp1 = vintpf(v_g, v_tmp2, v_tmp1); v_tmp1 = vintpf(v_g, v_tmp2, v_tmp1);
@ -250,12 +272,6 @@ void rtengine::HaldCLUT::splitClutFilename(
{ {
Glib::ustring basename = Glib::path_get_basename(filename); Glib::ustring basename = Glib::path_get_basename(filename);
Glib::ustring::size_type last_slash_pos = basename.rfind('/');
if (last_slash_pos == Glib::ustring::npos) {
last_slash_pos = basename.rfind('\\');
}
const Glib::ustring::size_type last_dot_pos = basename.rfind('.'); const Glib::ustring::size_type last_dot_pos = basename.rfind('.');
if (last_dot_pos != Glib::ustring::npos) { if (last_dot_pos != Glib::ustring::npos) {

View File

@ -3226,8 +3226,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
xyz2clut = iccStore->workingSpaceInverseMatrix( hald_clut->getProfile() ); xyz2clut = iccStore->workingSpaceInverseMatrix( hald_clut->getProfile() );
xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working ); xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working );
clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() ); clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() );
#ifdef __SSE2__
#ifdef __SSE2__
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) { for (int j = 0; j < 3; ++j) {
v_work2xyz[i][j] = F2V(work2xyz[i][j]); v_work2xyz[i][j] = F2V(work2xyz[i][j]);
@ -3236,8 +3236,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
v_clut2xyz[i][j] = F2V(clut2xyz[i][j]); v_clut2xyz[i][j] = F2V(clut2xyz[i][j]);
} }
} }
#endif #endif
} }
} }
} }
@ -4361,12 +4361,12 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
// Convert from working to clut profile // Convert from working to clut profile
int j = jstart; int j = jstart;
int tj = 0; int tj = 0;
#ifdef __SSE2__
#ifdef __SSE2__
for (; j < tW - 3; j += 4, tj += 4) { for (; j < tW - 3; j += 4, tj += 4) {
vfloat sourceR = LVFU(rtemp[ti * TS + tj]); vfloat sourceR = LVF(rtemp[ti * TS + tj]);
vfloat sourceG = LVFU(gtemp[ti * TS + tj]); vfloat sourceG = LVF(gtemp[ti * TS + tj]);
vfloat sourceB = LVFU(btemp[ti * TS + tj]); vfloat sourceB = LVF(btemp[ti * TS + tj]);
vfloat x; vfloat x;
vfloat y; vfloat y;
@ -4374,11 +4374,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz); Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz);
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut); Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut);
STVFU(rtemp[ti * TS + tj], sourceR); STVF(rtemp[ti * TS + tj], sourceR);
STVFU(gtemp[ti * TS + tj], sourceG); STVF(gtemp[ti * TS + tj], sourceG);
STVFU(btemp[ti * TS + tj], sourceB); STVF(btemp[ti * TS + tj], sourceB);
} }
#endif #endif
for (; j < tW; j++, tj++) { for (; j < tW; j++, tj++) {
@ -4428,12 +4427,12 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
// Convert from clut to working profile // Convert from clut to working profile
int j = jstart; int j = jstart;
int tj = 0; int tj = 0;
#ifdef __SSE2__
#ifdef __SSE2__
for (; j < tW - 3; j += 4, tj += 4) { for (; j < tW - 3; j += 4, tj += 4) {
vfloat sourceR = LVFU(rtemp[ti * TS + tj]); vfloat sourceR = LVF(rtemp[ti * TS + tj]);
vfloat sourceG = LVFU(gtemp[ti * TS + tj]); vfloat sourceG = LVF(gtemp[ti * TS + tj]);
vfloat sourceB = LVFU(btemp[ti * TS + tj]); vfloat sourceB = LVF(btemp[ti * TS + tj]);
vfloat x; vfloat x;
vfloat y; vfloat y;
@ -4441,11 +4440,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz); Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz);
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work); Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work);
STVFU(rtemp[ti * TS + tj], sourceR); STVF(rtemp[ti * TS + tj], sourceR);
STVFU(gtemp[ti * TS + tj], sourceG); STVF(gtemp[ti * TS + tj], sourceG);
STVFU(btemp[ti * TS + tj], sourceB); STVF(btemp[ti * TS + tj], sourceB);
} }
#endif #endif
for (; j < tW; j++, tj++) { for (; j < tW; j++, tj++) {