LCMS performance optimizations

This commit is contained in:
Oliver Duis 2011-04-09 17:49:44 +02:00
parent 31791b268f
commit 6024cb817a
6 changed files with 84 additions and 44 deletions

View File

@ -271,3 +271,10 @@ Image16::tofloat() const
}
return imgfloat;
}
// Parallized transformation; create transform with cmsFLAGS_NOCACHE!
void Image16::ExecCMSTransform(cmsHTRANSFORM hTransform) {
#pragma omp parallel for
for (int i=0; i<height; i++)
cmsDoTransform(hTransform, data + 3*i*rowstride, data + 3*i*rowstride, rowstride);
}

View File

@ -58,7 +58,7 @@ class Image16 : public ImageIO, public IImage16 {
Image16* copy ();
Image8* to8() const;
Imagefloat* tofloat() const;
Imagefloat* tofloat() const;
Image16* rotate (int deg);
Image16* hflip ();
@ -87,6 +87,8 @@ class Image16 : public ImageIO, public IImage16 {
virtual unsigned short** getRPlane () { return r; }
virtual unsigned short** getGPlane () { return g; }
virtual unsigned short** getBPlane () { return b; }
};
void ExecCMSTransform(cmsHTRANSFORM hTransform);
};
};
#endif

View File

@ -21,6 +21,7 @@
#include <image8.h>
#include <string.h>
#include <rtengine.h>
#include <mytime.h>
using namespace rtengine;
@ -234,4 +235,9 @@ Imagefloat::to16() const
return img16;
}
// Parallized transformation; create transform with cmsFLAGS_NOCACHE!
void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) {
#pragma omp parallel for
for (int i=0; i<height; i++)
cmsDoTransform(hTransform, data + 3*i*rowstride, data + 3*i*rowstride, rowstride);
}

View File

@ -58,8 +58,8 @@ class Imagefloat : public ImageIO, public IImagefloat {
Imagefloat* copy ();
Image8* to8() const;
Image16* to16() const;
Image8* to8() const;
Image16* to16() const;
Imagefloat* rotate (int deg);
@ -70,25 +70,27 @@ class Imagefloat : public ImageIO, public IImagefloat {
virtual int getW () { return width; }
virtual int getH () { return height; }
virtual void allocate (int width, int height);
virtual int getBPS () { return 8*sizeof(float); }
//virtual void getScanline (int row, unsigned char* buffer, int bps);
//virtual void setScanline (int row, unsigned char* buffer, int bps);
virtual int getBPS () { return 8*sizeof(float); }
//virtual void getScanline (int row, unsigned char* buffer, int bps);
//virtual void setScanline (int row, unsigned char* buffer, int bps);
// functions inherited from IImagefloat:
virtual int getWidth () { return width; }
virtual int getHeight () { return height; }
virtual Glib::Mutex& getMutex () { return mutex (); }
virtual cmsHPROFILE getProfile () { return getEmbeddedProfile (); }
virtual int getBitsPerPixel () { return 16; }
virtual int saveToFile (Glib::ustring fname) { return save (fname); }
virtual int saveAsPNG (Glib::ustring fname, int compression = -1, int bps = -1) { return savePNG (fname, compression, bps); }
virtual int saveAsJPEG (Glib::ustring fname, int quality = 100) { return saveJPEG (fname, quality); }
virtual int saveAsTIFF (Glib::ustring fname, int bps = -1, bool uncompressed = false) { return saveTIFF (fname, bps, uncompressed); }
virtual void setSaveProgressListener (ProgressListener* pl) { return setProgressListener (pl); }
virtual Glib::Mutex& getMutex () { return mutex (); }
virtual cmsHPROFILE getProfile () { return getEmbeddedProfile (); }
virtual int getBitsPerPixel () { return 16; }
virtual int saveToFile (Glib::ustring fname) { return save (fname); }
virtual int saveAsPNG (Glib::ustring fname, int compression = -1, int bps = -1) { return savePNG (fname, compression, bps); }
virtual int saveAsJPEG (Glib::ustring fname, int quality = 100) { return saveJPEG (fname, quality); }
virtual int saveAsTIFF (Glib::ustring fname, int bps = -1, bool uncompressed = false) { return saveTIFF (fname, bps, uncompressed); }
virtual void setSaveProgressListener (ProgressListener* pl) { return setProgressListener (pl); }
virtual void free () { delete this; }
virtual float** getRPlane () { return r; }
virtual float** getGPlane () { return g; }
virtual float** getBPlane () { return b; }
};
void ExecCMSTransform(cmsHTRANSFORM hTransform);
};
};
#endif

View File

@ -1574,7 +1574,7 @@ void RawImageSource::colorSpaceConversion (Imagefloat* im, ColorManagementParams
for (int k=0; k<3; k++)
mat[i][j] += work[i][k] * camMatrix[k][j]; // rgb_xyz * xyz_cam
#pragma omp parallel for
#pragma omp parallel for
for (int i=0; i<im->height; i++)
for (int j=0; j<im->width; j++) {
@ -1587,8 +1587,9 @@ void RawImageSource::colorSpaceConversion (Imagefloat* im, ColorManagementParams
im->b[i][j] = (newb);
}
} else {// use supplied input profile
//color space transform is expecting data in the range (0,1)
} else {
// use supplied input profile
// color space transform is expecting data in the range (0,1)
for ( int h = 0; h < im->height; ++h )
for ( int w = 0; w < im->width; ++w ) {
im->r[h][w] /= 65535.0f ;
@ -1597,14 +1598,19 @@ void RawImageSource::colorSpaceConversion (Imagefloat* im, ColorManagementParams
}
out = iccStore->workingSpace (cmp.working);
// out = iccStore->workingSpaceGamma (wProfile);
lcmsMutex->lock ();
cmsHTRANSFORM hTransform = cmsCreateTransform (in, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent, 0);
cmsHTRANSFORM hTransform = cmsCreateTransform (in, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent,
cmsFLAGS_NOCACHE ); // NOCACHE is important for thread safety
lcmsMutex->unlock ();
if (hTransform) {//there is an input profile
if (hTransform) {
// there is an input profile
if (cmp.gammaOnInput) {
float gd = pow (2.0, defgain);
defgain = 0.0;// Writeback defgain to be 0.0
#pragma omp parallel for
defgain = 0.0; // Writeback defgain to be 0.0
#pragma omp parallel for
for (int i=0; i<im->height; i++)
for (int j=0; j<im->width; j++) {
//TODO: extend beyond 65535
@ -1613,21 +1619,28 @@ void RawImageSource::colorSpaceConversion (Imagefloat* im, ColorManagementParams
im->b[i][j] = CurveFactory::gamma (CLIP(gd*im->b[i][j]));
}
}
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
} else {//create the profile
im->ExecCMSTransform(hTransform);
} else {
// create the profile from camera
lcmsMutex->lock ();
hTransform = cmsCreateTransform (camprofile, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent, cmsFLAGS_NOOPTIMIZE);
hTransform = cmsCreateTransform (camprofile, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent,
cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE ); // NOCACHE is important for thread safety
lcmsMutex->unlock ();
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
im->ExecCMSTransform(hTransform);
}
//restore normalization to the range (0,65535)
cmsDeleteTransform(hTransform);
// restore normalization to the range (0,65535)
#pragma omp parallel for
for ( int h = 0; h < im->height; ++h )
for ( int w = 0; w < im->width; ++w ) {
im->r[h][w] *= 65535.0 ;
im->g[h][w] *= 65535.0 ;
im->b[h][w] *= 65535.0 ;
}
cmsDeleteTransform(hTransform);
}
t3.set ();
// printf ("ICM TIME: %d\n", t3.etime(t1));
@ -1642,9 +1655,8 @@ void RawImageSource::colorSpaceConversion16 (Image16* im, ColorManagementParams
if (cmp.input == "(none)")
return;
MyTime t1, t2, t3;
t1.set ();
//MyTime t1, t2, t3;
//t1.set ();
cmsHPROFILE in;
cmsHPROFILE out;
@ -1697,13 +1709,15 @@ void RawImageSource::colorSpaceConversion16 (Image16* im, ColorManagementParams
out = iccStore->workingSpace (cmp.working);
// out = iccStore->workingSpaceGamma (wProfile);
lcmsMutex->lock ();
cmsHTRANSFORM hTransform = cmsCreateTransform (in, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, 0);
cmsHTRANSFORM hTransform = cmsCreateTransform (in, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, cmsFLAGS_NOCACHE); // NOCACHE is important for thread safety
lcmsMutex->unlock ();
if (hTransform) {
if (cmp.gammaOnInput) {
float gd = pow (2.0, defgain);
defgain = 0.0;
#pragma omp parallel for
#pragma omp parallel for
for (int i=0; i<im->height; i++)
for (int j=0; j<im->width; j++) {
im->r[i][j] = CurveFactory::gamma ((gd*im->r[i][j]));
@ -1711,17 +1725,21 @@ void RawImageSource::colorSpaceConversion16 (Image16* im, ColorManagementParams
im->b[i][j] = CurveFactory::gamma ((gd*im->b[i][j]));
}
}
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
im->ExecCMSTransform(hTransform);
}
else {
lcmsMutex->lock ();
hTransform = cmsCreateTransform (camprofile, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, 0);
hTransform = cmsCreateTransform (camprofile, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, cmsFLAGS_NOCACHE);
lcmsMutex->unlock ();
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
im->ExecCMSTransform(hTransform);
}
cmsDeleteTransform(hTransform);
}
t3.set ();
//t3.set ();
// printf ("ICM TIME: %d\n", t3.etime(t1));
}

View File

@ -315,10 +315,13 @@ void StdImageSource::colorSpaceConversion (Imagefloat* im, ColorManagementParams
if (cmp.input!="(none)") {
lcmsMutex->lock ();
cmsHTRANSFORM hTransform = cmsCreateTransform (in, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent, cmsFLAGS_NOOPTIMIZE);
cmsHTRANSFORM hTransform = cmsCreateTransform (in, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), out, (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|PLANAR_SH(1)), settings->colorimetricIntent,
cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE);
lcmsMutex->unlock ();
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
cmsDeleteTransform(hTransform);
im->ExecCMSTransform(hTransform);
cmsDeleteTransform(hTransform);
}
}
@ -350,9 +353,11 @@ void StdImageSource::colorSpaceConversion16 (Image16* im, ColorManagementParams
if (cmp.input!="(none)") {
lcmsMutex->lock ();
cmsHTRANSFORM hTransform = cmsCreateTransform (in, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, 0);
cmsHTRANSFORM hTransform = cmsCreateTransform (in, TYPE_RGB_16_PLANAR, out, TYPE_RGB_16_PLANAR, settings->colorimetricIntent, cmsFLAGS_NOCACHE);
lcmsMutex->unlock ();
cmsDoTransform (hTransform, im->data, im->data, im->planestride);
im->ExecCMSTransform(hTransform);
cmsDeleteTransform(hTransform);
}
}