Merge branch 'master' into gtk3

This commit is contained in:
Beep6581 2016-05-06 18:51:21 +02:00
commit 0074a5d429
2 changed files with 34 additions and 24 deletions

View File

@ -56,7 +56,7 @@ bool loadFile(
img_src.convertColorSpace(img_float.get(), icm, curr_wb);
}
AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 4);
AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 8); // + 8 because of SSE4_1 version of getClutValue
std::size_t index = 0;
@ -81,9 +81,9 @@ bool loadFile(
vfloat getClutValue(const AlignedBuffer<std::uint16_t>& clut_image, size_t index)
{
#ifdef __SSE4_1__
return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(*reinterpret_cast<const __m128i*>(clut_image.data + index)));
return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(clut_image.data + index))));
#else
return _mm_cvtpu16_ps(*reinterpret_cast<const __m64*>(clut_image.data + index));
return _mm_set_ps(clut_image.data[index + 3], clut_image.data[index + 2], clut_image.data[index + 1], clut_image.data[index]);
#endif
}
#endif
@ -205,8 +205,8 @@ void rtengine::HaldCLUT::getRGB(
out_rgbx[2] = intp<float>(strength, out_rgbx[2], *b);
#else
const vfloat v_in = _mm_set_ps(0.0f, *b, *g, *r);
const vfloat v_tmp = v_in * _mm_load_ps1(&flevel_minus_one);
const vfloat v_rgb = v_tmp - _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_min_ps(_mm_load_ps1(&flevel_minus_two), v_tmp)));
const vfloat v_tmp = v_in * F2V(flevel_minus_one);
const vfloat v_rgb = v_tmp - _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_min_ps(F2V(flevel_minus_two), v_tmp)));
size_t index = color * 4;

View File

@ -3209,10 +3209,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
bool clutAndWorkingProfilesAreSame = false;
TMatrix work2xyz, xyz2clut, clut2xyz, xyz2work;
#ifdef __SSE2__
vfloat v_work2xyz[3][3];
vfloat v_xyz2clut[3][3];
vfloat v_clut2xyz[3][3];
vfloat v_xyz2work[3][3];
vfloat v_work2xyz[3][3] ALIGNED16;
vfloat v_xyz2clut[3][3] ALIGNED16;
vfloat v_clut2xyz[3][3] ALIGNED16;
vfloat v_xyz2work[3][3] ALIGNED16;
#endif
if ( params->filmSimulation.enabled && !params->filmSimulation.clutFilename.empty() ) {
@ -3227,6 +3227,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working );
clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() );
#ifdef __SSE2__
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
v_work2xyz[i][j] = F2V(work2xyz[i][j]);
@ -3235,6 +3236,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
v_clut2xyz[i][j] = F2V(clut2xyz[i][j]);
}
}
#endif
}
}
@ -3452,6 +3454,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
editWhateverTmp = (float(*))data;
}
float out_rgbx[4 * TS] ALIGNED16; // Line buffer for CLUT
#ifdef _OPENMP
#pragma omp for schedule(dynamic) collapse(2)
@ -4352,8 +4355,6 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
// Film Simulations
if (hald_clut) {
float out_rgbx[4 * TS] ALIGNED16;
for (int i = istart, ti = 0; i < tH; i++, ti++) {
if (!clutAndWorkingProfilesAreSame) {
@ -4361,10 +4362,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
int j = jstart;
int tj = 0;
#ifdef __SSE2__
for (; j < tW - 3; j += 4, tj += 4) {
vfloat sourceR = LVF(rtemp[ti * TS + tj]);
vfloat sourceG = LVF(gtemp[ti * TS + tj]);
vfloat sourceB = LVF(btemp[ti * TS + tj]);
vfloat sourceR = LVFU(rtemp[ti * TS + tj]);
vfloat sourceG = LVFU(gtemp[ti * TS + tj]);
vfloat sourceB = LVFU(btemp[ti * TS + tj]);
vfloat x;
vfloat y;
@ -4372,11 +4374,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz);
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut);
STVF(rtemp[ti * TS + tj], sourceR);
STVF(gtemp[ti * TS + tj], sourceG);
STVF(btemp[ti * TS + tj], sourceB);
STVFU(rtemp[ti * TS + tj], sourceR);
STVFU(gtemp[ti * TS + tj], sourceG);
STVFU(btemp[ti * TS + tj], sourceB);
}
#endif
for (; j < tW; j++, tj++) {
float &sourceR = rtemp[ti * TS + tj];
float &sourceG = gtemp[ti * TS + tj];
@ -4425,10 +4429,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
int j = jstart;
int tj = 0;
#ifdef __SSE2__
for (; j < tW - 3; j += 4, tj += 4) {
vfloat sourceR = LVF(rtemp[ti * TS + tj]);
vfloat sourceG = LVF(gtemp[ti * TS + tj]);
vfloat sourceB = LVF(btemp[ti * TS + tj]);
vfloat sourceR = LVFU(rtemp[ti * TS + tj]);
vfloat sourceG = LVFU(gtemp[ti * TS + tj]);
vfloat sourceB = LVFU(btemp[ti * TS + tj]);
vfloat x;
vfloat y;
@ -4436,11 +4441,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz);
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work);
STVF(rtemp[ti * TS + tj], sourceR);
STVF(gtemp[ti * TS + tj], sourceG);
STVF(btemp[ti * TS + tj], sourceB);
STVFU(rtemp[ti * TS + tj], sourceR);
STVFU(gtemp[ti * TS + tj], sourceG);
STVFU(btemp[ti * TS + tj], sourceB);
}
#endif
for (; j < tW; j++, tj++) {
float &sourceR = rtemp[ti * TS + tj];
float &sourceG = gtemp[ti * TS + tj];
@ -7185,6 +7192,7 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
wipv[i][j] = F2V(wiprof[i][j]);
}
}
#endif
#ifdef _OPENMP
@ -7194,9 +7202,10 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
for(int i = 0; i < H; i++) {
int j = 0;
#ifdef __SSE2__
for(; j < W - 3; j += 4) {
vfloat X, Y, Z;
vfloat R,G,B;
vfloat R, G, B;
Color::Lab2XYZ(LVFU(src.L[i][j]), LVFU(src.a[i][j]), LVFU(src.b[i][j]), X, Y, Z);
Color::xyz2rgb(X, Y, Z, R, G, B, wipv);
STVFU(dst.r(i, j), R);
@ -7205,6 +7214,7 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
}
#endif
for(; j < W; j++) {
float X, Y, Z;
Color::Lab2XYZ(src.L[i][j], src.a[i][j], src.b[i][j], X, Y, Z);