Merge branch 'master' into gtk3
This commit is contained in:
commit
0074a5d429
@ -56,7 +56,7 @@ bool loadFile(
|
|||||||
img_src.convertColorSpace(img_float.get(), icm, curr_wb);
|
img_src.convertColorSpace(img_float.get(), icm, curr_wb);
|
||||||
}
|
}
|
||||||
|
|
||||||
AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 4);
|
AlignedBuffer<std::uint16_t> image(fw * fh * 4 + 8); // + 8 because of SSE4_1 version of getClutValue
|
||||||
|
|
||||||
std::size_t index = 0;
|
std::size_t index = 0;
|
||||||
|
|
||||||
@ -81,9 +81,9 @@ bool loadFile(
|
|||||||
vfloat getClutValue(const AlignedBuffer<std::uint16_t>& clut_image, size_t index)
|
vfloat getClutValue(const AlignedBuffer<std::uint16_t>& clut_image, size_t index)
|
||||||
{
|
{
|
||||||
#ifdef __SSE4_1__
|
#ifdef __SSE4_1__
|
||||||
return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(*reinterpret_cast<const __m128i*>(clut_image.data + index)));
|
return _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(clut_image.data + index))));
|
||||||
#else
|
#else
|
||||||
return _mm_cvtpu16_ps(*reinterpret_cast<const __m64*>(clut_image.data + index));
|
return _mm_set_ps(clut_image.data[index + 3], clut_image.data[index + 2], clut_image.data[index + 1], clut_image.data[index]);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -205,8 +205,8 @@ void rtengine::HaldCLUT::getRGB(
|
|||||||
out_rgbx[2] = intp<float>(strength, out_rgbx[2], *b);
|
out_rgbx[2] = intp<float>(strength, out_rgbx[2], *b);
|
||||||
#else
|
#else
|
||||||
const vfloat v_in = _mm_set_ps(0.0f, *b, *g, *r);
|
const vfloat v_in = _mm_set_ps(0.0f, *b, *g, *r);
|
||||||
const vfloat v_tmp = v_in * _mm_load_ps1(&flevel_minus_one);
|
const vfloat v_tmp = v_in * F2V(flevel_minus_one);
|
||||||
const vfloat v_rgb = v_tmp - _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_min_ps(_mm_load_ps1(&flevel_minus_two), v_tmp)));
|
const vfloat v_rgb = v_tmp - _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_min_ps(F2V(flevel_minus_two), v_tmp)));
|
||||||
|
|
||||||
size_t index = color * 4;
|
size_t index = color * 4;
|
||||||
|
|
||||||
|
@ -3209,10 +3209,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
bool clutAndWorkingProfilesAreSame = false;
|
bool clutAndWorkingProfilesAreSame = false;
|
||||||
TMatrix work2xyz, xyz2clut, clut2xyz, xyz2work;
|
TMatrix work2xyz, xyz2clut, clut2xyz, xyz2work;
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
vfloat v_work2xyz[3][3];
|
vfloat v_work2xyz[3][3] ALIGNED16;
|
||||||
vfloat v_xyz2clut[3][3];
|
vfloat v_xyz2clut[3][3] ALIGNED16;
|
||||||
vfloat v_clut2xyz[3][3];
|
vfloat v_clut2xyz[3][3] ALIGNED16;
|
||||||
vfloat v_xyz2work[3][3];
|
vfloat v_xyz2work[3][3] ALIGNED16;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( params->filmSimulation.enabled && !params->filmSimulation.clutFilename.empty() ) {
|
if ( params->filmSimulation.enabled && !params->filmSimulation.clutFilename.empty() ) {
|
||||||
@ -3227,6 +3227,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working );
|
xyz2work = iccStore->workingSpaceInverseMatrix( params->icm.working );
|
||||||
clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() );
|
clut2xyz = iccStore->workingSpaceMatrix( hald_clut->getProfile() );
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
for (int j = 0; j < 3; ++j) {
|
for (int j = 0; j < 3; ++j) {
|
||||||
v_work2xyz[i][j] = F2V(work2xyz[i][j]);
|
v_work2xyz[i][j] = F2V(work2xyz[i][j]);
|
||||||
@ -3235,6 +3236,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
v_clut2xyz[i][j] = F2V(clut2xyz[i][j]);
|
v_clut2xyz[i][j] = F2V(clut2xyz[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3452,6 +3454,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
editWhateverTmp = (float(*))data;
|
editWhateverTmp = (float(*))data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float out_rgbx[4 * TS] ALIGNED16; // Line buffer for CLUT
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
@ -4352,8 +4355,6 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
|
|
||||||
// Film Simulations
|
// Film Simulations
|
||||||
if (hald_clut) {
|
if (hald_clut) {
|
||||||
float out_rgbx[4 * TS] ALIGNED16;
|
|
||||||
|
|
||||||
|
|
||||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||||
if (!clutAndWorkingProfilesAreSame) {
|
if (!clutAndWorkingProfilesAreSame) {
|
||||||
@ -4361,10 +4362,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
int j = jstart;
|
int j = jstart;
|
||||||
int tj = 0;
|
int tj = 0;
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for (; j < tW - 3; j += 4, tj += 4) {
|
for (; j < tW - 3; j += 4, tj += 4) {
|
||||||
vfloat sourceR = LVF(rtemp[ti * TS + tj]);
|
vfloat sourceR = LVFU(rtemp[ti * TS + tj]);
|
||||||
vfloat sourceG = LVF(gtemp[ti * TS + tj]);
|
vfloat sourceG = LVFU(gtemp[ti * TS + tj]);
|
||||||
vfloat sourceB = LVF(btemp[ti * TS + tj]);
|
vfloat sourceB = LVFU(btemp[ti * TS + tj]);
|
||||||
|
|
||||||
vfloat x;
|
vfloat x;
|
||||||
vfloat y;
|
vfloat y;
|
||||||
@ -4372,11 +4374,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz);
|
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_work2xyz);
|
||||||
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut);
|
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2clut);
|
||||||
|
|
||||||
STVF(rtemp[ti * TS + tj], sourceR);
|
STVFU(rtemp[ti * TS + tj], sourceR);
|
||||||
STVF(gtemp[ti * TS + tj], sourceG);
|
STVFU(gtemp[ti * TS + tj], sourceG);
|
||||||
STVF(btemp[ti * TS + tj], sourceB);
|
STVFU(btemp[ti * TS + tj], sourceB);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (; j < tW; j++, tj++) {
|
for (; j < tW; j++, tj++) {
|
||||||
float &sourceR = rtemp[ti * TS + tj];
|
float &sourceR = rtemp[ti * TS + tj];
|
||||||
float &sourceG = gtemp[ti * TS + tj];
|
float &sourceG = gtemp[ti * TS + tj];
|
||||||
@ -4425,10 +4429,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
int j = jstart;
|
int j = jstart;
|
||||||
int tj = 0;
|
int tj = 0;
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for (; j < tW - 3; j += 4, tj += 4) {
|
for (; j < tW - 3; j += 4, tj += 4) {
|
||||||
vfloat sourceR = LVF(rtemp[ti * TS + tj]);
|
vfloat sourceR = LVFU(rtemp[ti * TS + tj]);
|
||||||
vfloat sourceG = LVF(gtemp[ti * TS + tj]);
|
vfloat sourceG = LVFU(gtemp[ti * TS + tj]);
|
||||||
vfloat sourceB = LVF(btemp[ti * TS + tj]);
|
vfloat sourceB = LVFU(btemp[ti * TS + tj]);
|
||||||
|
|
||||||
vfloat x;
|
vfloat x;
|
||||||
vfloat y;
|
vfloat y;
|
||||||
@ -4436,11 +4441,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz);
|
Color::rgbxyz(sourceR, sourceG, sourceB, x, y, z, v_clut2xyz);
|
||||||
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work);
|
Color::xyz2rgb(x, y, z, sourceR, sourceG, sourceB, v_xyz2work);
|
||||||
|
|
||||||
STVF(rtemp[ti * TS + tj], sourceR);
|
STVFU(rtemp[ti * TS + tj], sourceR);
|
||||||
STVF(gtemp[ti * TS + tj], sourceG);
|
STVFU(gtemp[ti * TS + tj], sourceG);
|
||||||
STVF(btemp[ti * TS + tj], sourceB);
|
STVFU(btemp[ti * TS + tj], sourceB);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (; j < tW; j++, tj++) {
|
for (; j < tW; j++, tj++) {
|
||||||
float &sourceR = rtemp[ti * TS + tj];
|
float &sourceR = rtemp[ti * TS + tj];
|
||||||
float &sourceG = gtemp[ti * TS + tj];
|
float &sourceG = gtemp[ti * TS + tj];
|
||||||
@ -7185,6 +7192,7 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
|
|||||||
wipv[i][j] = F2V(wiprof[i][j]);
|
wipv[i][j] = F2V(wiprof[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
@ -7194,9 +7202,10 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
|
|||||||
for(int i = 0; i < H; i++) {
|
for(int i = 0; i < H; i++) {
|
||||||
int j = 0;
|
int j = 0;
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for(; j < W - 3; j += 4) {
|
for(; j < W - 3; j += 4) {
|
||||||
vfloat X, Y, Z;
|
vfloat X, Y, Z;
|
||||||
vfloat R,G,B;
|
vfloat R, G, B;
|
||||||
Color::Lab2XYZ(LVFU(src.L[i][j]), LVFU(src.a[i][j]), LVFU(src.b[i][j]), X, Y, Z);
|
Color::Lab2XYZ(LVFU(src.L[i][j]), LVFU(src.a[i][j]), LVFU(src.b[i][j]), X, Y, Z);
|
||||||
Color::xyz2rgb(X, Y, Z, R, G, B, wipv);
|
Color::xyz2rgb(X, Y, Z, R, G, B, wipv);
|
||||||
STVFU(dst.r(i, j), R);
|
STVFU(dst.r(i, j), R);
|
||||||
@ -7205,6 +7214,7 @@ SSEFUNCTION void ImProcFunctions::lab2rgb(const LabImage &src, Imagefloat &dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(; j < W; j++) {
|
for(; j < W; j++) {
|
||||||
float X, Y, Z;
|
float X, Y, Z;
|
||||||
Color::Lab2XYZ(src.L[i][j], src.a[i][j], src.b[i][j], X, Y, Z);
|
Color::Lab2XYZ(src.L[i][j], src.a[i][j], src.b[i][j], X, Y, Z);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user