diff --git a/rtdata/languages/default b/rtdata/languages/default
index c45a3cac3..b0a128467 100644
--- a/rtdata/languages/default
+++ b/rtdata/languages/default
@@ -578,6 +578,7 @@ HISTORY_MSG_356;Wavelet edgedetect thresholdHi
 HISTORY_MSG_357;Wavelet Denoise link
 HISTORY_MSG_358;Wavelet Contrast Hue curve
 HISTORY_MSG_359;Hot/Dead - Threshold
+HISTORY_MSG_360;TM Gamma
 HISTORY_NEWSNAPSHOT;Add
 HISTORY_NEWSNAPSHOT_TOOLTIP;Shortcut: <b>Alt-s</b>
 HISTORY_SNAPSHOTS;Snapshots
@@ -1337,6 +1338,7 @@ TP_EPD_EDGESTOPPING;Edge stopping
 TP_EPD_LABEL;Tone Mapping
 TP_EPD_REWEIGHTINGITERATES;Reweighting iterates
 TP_EPD_SCALE;Scale
+TP_EPD_GAMMA;Gamma
 TP_EPD_STRENGTH;Strength
 TP_EPD_TOOLTIP;Tone mapping is possible via Lab mode (standard) and CIECAM02 mode.\n\nTo engage CIECAM02 tone mapping mode enable the following settings:\n1. CIECAM02\n2. Algorithm="Brightness + Colorfulness (QM)"\n3. "Tone mapping using CIECAM02 brightness (Q)"
 TP_EXPOSURE_AUTOLEVELS;Auto Levels
diff --git a/rtengine/EdgePreservingDecomposition.cc b/rtengine/EdgePreservingDecomposition.cc
index de2ca0622..6f8a30033 100644
--- a/rtengine/EdgePreservingDecomposition.cc
+++ b/rtengine/EdgePreservingDecomposition.cc
@@ -4,13 +4,13 @@
 #ifdef _OPENMP
 #include <omp.h>
 #endif
-#include "sleef.c"
-#include "opthelper.h"
-
-#define pow_F(a,b) (xexpf(b*xlogf(a)))
-
-#define DIAGONALS 5
-#define DIAGONALSP1 6
+#include "sleef.c"
+#include "opthelper.h"
+
+#define pow_F(a,b) (xexpf(b*xlogf(a)))
+
+#define DIAGONALS 5
+#define DIAGONALSP1 6
 
 /* Solves A x = b by the conjugate gradient method, where instead of feeding it the matrix A you feed it a function which
 calculates A x where x is some vector. Stops when rms residual < RMSResidual or when maximum iterates is reached.
@@ -21,12 +21,12 @@ Takes less memory with OkToModify_b = true, and Preconditioner = NULL. */
 float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), float *b, int n, bool OkToModify_b,
 	float *x, float RMSResidual, void *Pass, int MaximumIterates, void Preconditioner(float *Product, float *x, void *Pass)){
 	int iterate, i;
-
-	char* buffer = (char*)malloc(2*n*sizeof(float)+128);
+
+	char* buffer = (char*)malloc(2*n*sizeof(float)+128);
 	float *r = (float*)(buffer+64);
 	//Start r and x.
 	if(x == NULL){
-		x = new float[n];
+		x = new float[n];
 
 		memset(x, 0, sizeof(float)*n);		//Zero initial guess if x == NULL.
 		memcpy(r, b, sizeof(float)*n);
@@ -35,25 +35,25 @@ float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), fl
 #ifdef _OPENMP
 #pragma omp parallel for           // removed schedule(dynamic,10)
 #endif
-		for(int ii = 0; ii < n; ii++)
+		for(int ii = 0; ii < n; ii++)
 			r[ii] = b[ii] - r[ii];		//r = b - A x.
 	}
 	//s is preconditionment of r. Without, direct to r.
 	float *s = r, rs = 0.0f;
 	if(Preconditioner != NULL){
-		s = new float[n];
+		s = new float[n];
 
 		Preconditioner(s, r, Pass);
-	}
+	}
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+:rs)  // removed schedule(dynamic,10)
 #endif
 	for(int ii = 0; ii < n; ii++) {
-		rs += r[ii]*s[ii];
+		rs += r[ii]*s[ii];
 		}
 	//Search direction d.
-	float *d = (float*)(buffer + n*sizeof(float) + 128);
-	
+	float *d = (float*)(buffer + n*sizeof(float) + 128);
+	
 	memcpy(d, s, sizeof(float)*n);
 
 	//Store calculations of Ax in this.
@@ -61,7 +61,7 @@ float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), fl
 	if(!OkToModify_b) ax = new float[n];
 
 	//Start iterating!
-	if(MaximumIterates == 0) MaximumIterates = n;
+	if(MaximumIterates == 0) MaximumIterates = n;
 	for(iterate = 0; iterate < MaximumIterates; iterate++){
 		//Get step size alpha, store ax while at it.
 		float ab = 0.0f;
@@ -69,7 +69,7 @@ float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), fl
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+:ab)
 #endif
-		for(int ii = 0; ii < n; ii++)
+		for(int ii = 0; ii < n; ii++)
 			ab += d[ii]*ax[ii];
 
 		if(ab == 0.0f) break;	//So unlikely. It means perfectly converged or singular, stop either way.
@@ -93,90 +93,90 @@ float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), fl
 
 		//Get beta.
 		ab = rs;
-		rs = 0.0f;
-
+		rs = 0.0f;
+
 #ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel
 #endif
-{
-		float c = 0.0f;
-		float t;
+{
+		float c = 0.0f;
+		float t;
 		float temp;
 #ifdef _OPENMP
-#pragma omp for reduction(+:rs)                            // Summation with error correction
+#pragma omp for reduction(+:rs)                            // Summation with error correction
 #endif
-		for(int ii = 0; ii < n; ii++) {
-			temp = r[ii]*s[ii];
-			t = rs + temp;
-			if( fabsf(rs) >= fabsf(temp) )
-				c += ((rs-t) + temp);
-			else
-				c += ((temp-t)+rs);
-			rs = t;
-		}
+		for(int ii = 0; ii < n; ii++) {
+			temp = r[ii]*s[ii];
+			t = rs + temp;
+			if( fabsf(rs) >= fabsf(temp) )
+				c += ((rs-t) + temp);
+			else
+				c += ((temp-t)+rs);
+			rs = t;
+		}
 #ifdef _OPENMP
-#pragma omp critical
+#pragma omp critical
 #endif
-		rs += c;
-}
-
+		rs += c;
+}
+
 		ab = rs/ab;
 
-		//Update search direction p.
+		//Update search direction p.
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
-		for(int ii = 0; ii < n; ii++)
+		for(int ii = 0; ii < n; ii++)
 			d[ii] = s[ii] + ab*d[ii];
-
-
+
+
 	}
-
+
 	if(iterate == MaximumIterates)
 		if(iterate != n && RMSResidual != 0.0f)
 			printf("Warning: MaximumIterates (%u) reached in SparseConjugateGradient.\n", MaximumIterates);
 
 	if(ax != b) delete[] ax;
 	if(s != r) delete[] s;
-	free(buffer);
+	free(buffer);
 	return x;
 }
 
 MultiDiagonalSymmetricMatrix::MultiDiagonalSymmetricMatrix(int Dimension, int NumberOfDiagonalsInLowerTriangle){
 	n = Dimension;
-	m = NumberOfDiagonalsInLowerTriangle;
+	m = NumberOfDiagonalsInLowerTriangle;
 	IncompleteCholeskyFactorization = NULL;
 
 	Diagonals = new float *[m];
 	StartRows = new int [m+1];
 	memset(Diagonals, 0, sizeof(float *)*m);
-	memset(StartRows, 0, sizeof(int)*(m+1));
+	memset(StartRows, 0, sizeof(int)*(m+1));
 	StartRows[m] = n+1;
 }
 
 MultiDiagonalSymmetricMatrix::~MultiDiagonalSymmetricMatrix(){
-	if(DiagBuffer != NULL)
-		free(buffer);
-	else
-		for(int i=0;i<m;i++)
-			delete[] Diagonals[i];
-	
+	if(DiagBuffer != NULL)
+		free(buffer);
+	else
+		for(int i=0;i<m;i++)
+			delete[] Diagonals[i];
+	
 	delete[] Diagonals;
 	delete[] StartRows;
 }
 
-bool MultiDiagonalSymmetricMatrix::CreateDiagonal(int index, int StartRow){
-	// Changed memory allocation for diagonals to avoid L1 conflict misses
-	// Falls back to original version if big block could not be allocated
-	int padding = 4096 - ((n*m*sizeof(float)) % 4096);
-	if(index == 0){
-		buffer = (char*)calloc( (n+padding) * m * sizeof(float)+ (m+16)*64 + 63,1);
-		if(buffer == NULL)
-			// no big memory block available => try to allocate smaller blocks
-			DiagBuffer = NULL;
-		else {
-			DiagBuffer = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
-		}
+bool MultiDiagonalSymmetricMatrix::CreateDiagonal(int index, int StartRow){
+	// Changed memory allocation for diagonals to avoid L1 conflict misses
+	// Falls back to original version if big block could not be allocated
+	int padding = 4096 - ((n*m*sizeof(float)) % 4096);
+	if(index == 0){
+		buffer = (char*)calloc( (n+padding) * m * sizeof(float)+ (m+16)*64 + 63,1);
+		if(buffer == NULL)
+			// no big memory block available => try to allocate smaller blocks
+			DiagBuffer = NULL;
+		else {
+			DiagBuffer = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
+		}
 	}
 	if(index >= m){
 		printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: invalid index.\n");
@@ -187,32 +187,32 @@ bool MultiDiagonalSymmetricMatrix::CreateDiagonal(int index, int StartRow){
 			printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: each StartRow must exceed the previous.\n");
 			return false;
 		}
-
+
 	if(DiagBuffer != NULL)
-		Diagonals[index] = (float*)(DiagBuffer+(index*(n+padding)*sizeof(float))+((index+16)*64));
-	else {
-		Diagonals[index] = new float[DiagonalLength(StartRow)];
-		if(Diagonals[index] == NULL) {
+		Diagonals[index] = (float*)(DiagBuffer+(index*(n+padding)*sizeof(float))+((index+16)*64));
+	else {
+		Diagonals[index] = new float[DiagonalLength(StartRow)];
+		if(Diagonals[index] == NULL) {
 			printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: memory allocation failed. Out of memory?\n");
-			return false;
+			return false;
 		}
 		memset(Diagonals[index], 0, sizeof(float)*DiagonalLength(StartRow));
-	}
-		
+	}
+		
 	StartRows[index] = StartRow;
 	return true;
 }
 
 inline int MultiDiagonalSymmetricMatrix::FindIndex(int StartRow) {
-	//There's GOT to be a better way to do this. "Bidirectional map?"
-	// Issue 1895 : Changed start of loop from zero to one
+	//There's GOT to be a better way to do this. "Bidirectional map?"
+	// Issue 1895 : Changed start of loop from zero to one
 	// m is small (5 or 6)
 	for(int i = 1; i < m; i++)
 		if(StartRows[i] == StartRow)
 			return i;
 	return -1;
 }
-
+
 bool MultiDiagonalSymmetricMatrix::LazySetEntry(float value, int row, int column){
 	//On the strict upper triangle? Swap, this is ok due to symmetry.
 	int i, sr;
@@ -232,90 +232,90 @@ bool MultiDiagonalSymmetricMatrix::LazySetEntry(float value, int row, int column
 }
 
 SSEFUNCTION void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* RESTRICT x){
-
-	int srm = StartRows[m-1];
-	int lm = DiagonalLength(srm);
-#ifdef _OPENMP
-#ifdef __SSE2__
-	const int chunkSize = (lm-srm)/(omp_get_num_procs()*32);
-#else
-	const int chunkSize = (lm-srm)/(omp_get_num_procs()*8);
-#endif
-#endif
-#pragma omp parallel
-{
-	// First fill the big part in the middle
-	// This can be done without intermediate stores to memory and it can be parallelized too
-#ifdef _OPENMP
-#pragma omp for schedule(dynamic,chunkSize) nowait
-#endif
-#ifdef __SSE2__
-	for(int j=srm;j<lm-3;j+=4) {
-		__m128 prodv = LVFU(Diagonals[0][j]) * LVFU(x[j]);
-		for(int i=m-1;i>0;i--) {
-			int s = StartRows[i];
-			prodv += (LVFU(Diagonals[i][j - s])*LVFU(x[j - s])) + (LVFU(Diagonals[i][j])*LVFU(x[j + s]));
-		}
-		_mm_storeu_ps(&Product[j],prodv);
-	}
-#else
-	for(int j=srm;j<lm;j++) {
-		float prod = Diagonals[0][j]*x[j];
-		for(int i=m-1;i>0;i--) {
-			int s = StartRows[i];
-			prod += (Diagonals[i][j - s]*x[j - s]) + (Diagonals[i][j]*x[j + s]);
-		}
-		Product[j] = prod;
-	}
-
-#endif
-#pragma omp single
-{
-#ifdef __SSE2__
-		for(int j=lm-((lm-srm)%4);j<lm;j++) {
-		float prod = Diagonals[0][j]*x[j];
-		for(int i=m-1;i>0;i--) {
-			int s = StartRows[i];
-			prod += (Diagonals[i][j - s]*x[j - s]) + (Diagonals[i][j]*x[j + s]);
-		}
-		Product[j] = prod;
-	}
-#endif
-	// Fill remaining area
+
+	int srm = StartRows[m-1];
+	int lm = DiagonalLength(srm);
+#ifdef _OPENMP
+#ifdef __SSE2__
+	const int chunkSize = (lm-srm)/(omp_get_num_procs()*32);
+#else
+	const int chunkSize = (lm-srm)/(omp_get_num_procs()*8);
+#endif
+#endif
+#pragma omp parallel
+{
+	// First fill the big part in the middle
+	// This can be done without intermediate stores to memory and it can be parallelized too
+#ifdef _OPENMP
+#pragma omp for schedule(dynamic,chunkSize) nowait
+#endif
+#ifdef __SSE2__
+	for(int j=srm;j<lm-3;j+=4) {
+		__m128 prodv = LVFU(Diagonals[0][j]) * LVFU(x[j]);
+		for(int i=m-1;i>0;i--) {
+			int s = StartRows[i];
+			prodv += (LVFU(Diagonals[i][j - s])*LVFU(x[j - s])) + (LVFU(Diagonals[i][j])*LVFU(x[j + s]));
+		}
+		_mm_storeu_ps(&Product[j],prodv);
+	}
+#else
+	for(int j=srm;j<lm;j++) {
+		float prod = Diagonals[0][j]*x[j];
+		for(int i=m-1;i>0;i--) {
+			int s = StartRows[i];
+			prod += (Diagonals[i][j - s]*x[j - s]) + (Diagonals[i][j]*x[j + s]);
+		}
+		Product[j] = prod;
+	}
+
+#endif
+#pragma omp single
+{
+#ifdef __SSE2__
+		for(int j=lm-((lm-srm)%4);j<lm;j++) {
+		float prod = Diagonals[0][j]*x[j];
+		for(int i=m-1;i>0;i--) {
+			int s = StartRows[i];
+			prod += (Diagonals[i][j - s]*x[j - s]) + (Diagonals[i][j]*x[j + s]);
+		}
+		Product[j] = prod;
+	}
+#endif
+	// Fill remaining area
 	// Loop over the stored diagonals.
 	for(int i = 0; i < m; i++){
 		int sr = StartRows[i];
 		float *a = Diagonals[i];	//One fewer dereference.
-		int l = DiagonalLength(sr);
+		int l = DiagonalLength(sr);
 		if(sr == 0) {
 			for(int j = 0; j < srm; j++)
 				Product[j] = a[j]*x[j];		//Separate, fairly simple treatment for the main diagonal.
 			for(int j = lm; j < l; j++)
 				Product[j] = a[j]*x[j];		//Separate, fairly simple treatment for the main diagonal.
 		} else {
-// Split the loop in 3 parts, so now the big one in the middle can be parallelized without race conditions
-			// updates 0 to sr - 1. Because sr is small (in the range of image-width) no benefit by omp
-			for(int j=0;j<sr;j++) {
-				Product[j] += a[j]*x[j + sr];		//Contribution from upper triangle
-			}
-			// Updates sr to l - 1. Because sr is small and l is big, this loop is parallelized
-			for(int j = sr; j < srm; j++) {
+// Split the loop in 3 parts, so now the big one in the middle can be parallelized without race conditions
+			// updates 0 to sr - 1. Because sr is small (in the range of image-width) no benefit by omp
+			for(int j=0;j<sr;j++) {
+				Product[j] += a[j]*x[j + sr];		//Contribution from upper triangle
+			}
+			// Updates sr to l - 1. Because sr is small and l is big, this loop is parallelized
+			for(int j = sr; j < srm; j++) {
 				Product[j] += a[j - sr]*x[j - sr] + a[j]*x[j + sr];		//Contribution from lower and upper triangle
-			}
-			for(int j = lm; j < l; j++) {
+			}
+			for(int j = lm; j < l; j++) {
 				Product[j] += a[j - sr]*x[j - sr] + a[j]*x[j + sr];		//Contribution from lower and upper triangle
-			}
+			}
 			// Updates l to l + sr - 1. Because sr is small (in the range of image-width) no benefit by omp
 			for(int j = l; j < l + sr; j++) {
 				Product[j] += a[j-sr]*x[j - sr];	//Contribution from lower triangle
-			}
-		}
-	}
-}
-}
+			}
+		}
+	}
+}
+}
 }
 
-bool MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization(int MaxFillAbove){
+bool MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization(int MaxFillAbove){
 	if(m == 1){
 		printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: just one diagonal? Can you divide?\n");
 		return false;
@@ -333,15 +333,15 @@ bool MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization(int Max
 		fp = rtengine::min(StartRows[ii] - StartRows[ii - 1], MaxFillAbove);	//Guarunteed positive since StartRows must be created in increasing order.
 		mic=mic+fp;
 		}
-	//Initialize the decomposition - setup memory, start rows, etc.
-
+	//Initialize the decomposition - setup memory, start rows, etc.
+
 	MultiDiagonalSymmetricMatrix *ic = new MultiDiagonalSymmetricMatrix(n, mic);
 	ic->CreateDiagonal(0, 0);	//There's always a main diagonal in this type of decomposition.
-	mic=1;
+	mic=1;
 	for(int ii = 1; ii < m; ii++){
 		//Set j to the number of diagonals to be created corresponding to a diagonal on this source matrix...
-		j = rtengine::min(StartRows[ii] - StartRows[ii - 1], MaxFillAbove);
-
+		j = rtengine::min(StartRows[ii] - StartRows[ii - 1], MaxFillAbove);
+
 		//...and create those diagonals. I want to take a moment to tell you about how much I love minimalistic loops: very much.
 		while(j-- != 0)
 			if(!ic->CreateDiagonal(mic++, StartRows[ii] - j)){
@@ -350,101 +350,101 @@ bool MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization(int Max
 				delete ic;
 				return false;
 			}
-	}
+	}
 
 	//It's all initialized? Uhkay. Do the actual math then.
 	int sss, ss, s;
 	int k, MaxStartRow = StartRows[m - 1];	//Handy number.
 	float **l = ic->Diagonals;
-	float  *d = ic->Diagonals[0];		//Describes D in LDLt.
-	int icm = ic->m;
-	int icn = ic->n;
-	int* RESTRICT icStartRows = ic->StartRows;
-
-	//Loop over the columns.
-
-	// create array for quicker access to ic->StartRows
-	struct s_diagmap {
-		int sss;
-		int ss;
-		int k;
-	};
-
-
-	// Pass one: count number of needed entries
-	int	entrycount = 0;
-	for(int i=1;i<icm;i++) {
-		for(int j=1;j<icm;j++) {
-			if(ic->FindIndex( icStartRows[i] + icStartRows[j]) > 0)
-				entrycount ++;
-		}
-	}
-
-	// now we can create the array
-	struct s_diagmap* RESTRICT DiagMap = new s_diagmap[entrycount];
-	// we also need the maxvalues
-	int	entrynumber = 0;
-	int index;
-	int* RESTRICT MaxIndizes = new int[icm];
-
-	for(int i=1;i<icm;i++) {
-		for(int j=1;j<icm;j++){
-			index = ic->FindIndex( icStartRows[i] + icStartRows[j]);
-			if(index > 0) {
-				DiagMap[entrynumber].ss = j;
-				DiagMap[entrynumber].sss = index;
-				DiagMap[entrynumber].k = icStartRows[j];
-				entrynumber ++;
-			}
-		}
-		MaxIndizes[i] = entrynumber - 1;
-	}
-
-	int* RESTRICT findmap = new int[icm];
-		for(int j=0;j<icm;j++)
-			findmap[j] = FindIndex( icStartRows[j]);
+	float  *d = ic->Diagonals[0];		//Describes D in LDLt.
+	int icm = ic->m;
+	int icn = ic->n;
+	int* RESTRICT icStartRows = ic->StartRows;
+
+	//Loop over the columns.
+
+	// create array for quicker access to ic->StartRows
+	struct s_diagmap {
+		int sss;
+		int ss;
+		int k;
+	};
+
+
+	// Pass one: count number of needed entries
+	int	entrycount = 0;
+	for(int i=1;i<icm;i++) {
+		for(int j=1;j<icm;j++) {
+			if(ic->FindIndex( icStartRows[i] + icStartRows[j]) > 0)
+				entrycount ++;
+		}
+	}
+
+	// now we can create the array
+	struct s_diagmap* RESTRICT DiagMap = new s_diagmap[entrycount];
+	// we also need the maxvalues
+	int	entrynumber = 0;
+	int index;
+	int* RESTRICT MaxIndizes = new int[icm];
+
+	for(int i=1;i<icm;i++) {
+		for(int j=1;j<icm;j++){
+			index = ic->FindIndex( icStartRows[i] + icStartRows[j]);
+			if(index > 0) {
+				DiagMap[entrynumber].ss = j;
+				DiagMap[entrynumber].sss = index;
+				DiagMap[entrynumber].k = icStartRows[j];
+				entrynumber ++;
+			}
+		}
+		MaxIndizes[i] = entrynumber - 1;
+	}
+
+	int* RESTRICT findmap = new int[icm];
+		for(int j=0;j<icm;j++)
+			findmap[j] = FindIndex( icStartRows[j]);
 	
 	for(j = 0; j < n; j++){
-		//Calculate d for this column.
+		//Calculate d for this column.
 		d[j] = Diagonals[0][j];
 
 		//This is a loop over k from 1 to j, inclusive. We'll cover that by looping over the index of the diagonals (s), and get k from it.
-		//The first diagonal is d (k = 0), so skip that and have s start at 1. Cover all available s but stop if k exceeds j.
-		s=1;
-		k=icStartRows[s];
+		//The first diagonal is d (k = 0), so skip that and have s start at 1. Cover all available s but stop if k exceeds j.
+		s=1;
+		k=icStartRows[s];
 		while(k<=j) {
-			d[j] -= l[s][j - k]*l[s][j - k]*d[j - k];
-			s++;
+			d[j] -= l[s][j - k]*l[s][j - k]*d[j - k];
+			s++;
 			k=icStartRows[s];
-		}
+		}
 		if(UNLIKELY(d[j] == 0.0f)){
 			printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: division by zero. Matrix not decomposable.\n");
-			delete ic;
-			delete[] DiagMap;
-			delete[] MaxIndizes;
+			delete ic;
+			delete[] DiagMap;
+			delete[] MaxIndizes;
 			delete[] findmap;
 			return false;
 		}
-		float id = 1.0f/d[j];
-		//Now, calculate l from top down along this column.
-
-		int mapindex = 0;
-		int jMax = icn - j;
+		float id = 1.0f/d[j];
+		//Now, calculate l from top down along this column.
+
+		int mapindex = 0;
+		int jMax = icn - j;
 		for(s = 1; s < icm; s++){
-			if(icStartRows[s] >= jMax)
-				break; //Possible values of j are limited
-				
-			float temp = 0.0f;
-			while(mapindex <= MaxIndizes[s] && ( k = DiagMap[mapindex].k) <= j) {
-				temp -= l[DiagMap[mapindex].sss][j - k]*l[DiagMap[mapindex].ss][j - k]*d[j - k];
-				mapindex ++;
-			}
-			sss = findmap[s];
+			if(icStartRows[s] >= jMax)
+				break; //Possible values of j are limited
+				
+			float temp = 0.0f;
+			while(mapindex <= MaxIndizes[s] && ( k = DiagMap[mapindex].k) <= j) {
+				temp -= l[DiagMap[mapindex].sss][j - k]*l[DiagMap[mapindex].ss][j - k]*d[j - k];
+				mapindex ++;
+			}
+			sss = findmap[s];
 			l[s][j] = id * (sss < 0 ? temp : (Diagonals[sss][j] + temp));
-		}
-	}
-	delete[] DiagMap;
-	delete[] MaxIndizes;
+		}
+	}
+	delete[] DiagMap;
+	delete[] MaxIndizes;
 	delete[] findmap;
 	IncompleteCholeskyFactorization = ic;
 	return true;
@@ -454,86 +454,86 @@ void MultiDiagonalSymmetricMatrix::KillIncompleteCholeskyFactorization(void){
 	delete IncompleteCholeskyFactorization;
 }
 
-void MultiDiagonalSymmetricMatrix::CholeskyBackSolve(float* RESTRICT x, float* RESTRICT b){
+void MultiDiagonalSymmetricMatrix::CholeskyBackSolve(float* RESTRICT x, float* RESTRICT b){
 	//We want to solve L D Lt x = b where D is a diagonal matrix described by Diagonals[0] and L is a unit lower triagular matrix described by the rest of the diagonals.
 	//Let D Lt x = y. Then, first solve L y = b.
 	float* RESTRICT  *d = IncompleteCholeskyFactorization->Diagonals;
 	int* RESTRICT s = IncompleteCholeskyFactorization->StartRows;
 	int M = IncompleteCholeskyFactorization->m, N = IncompleteCholeskyFactorization->n;
-	int i, j;
-	
+	int i, j;
+	
 	if(M != DIAGONALSP1){					// can happen in theory
 		for(j = 0; j < N; j++){
-			float sub = b[j];					// using local var to reduce memory writes, gave a big speedup
-			i = 1;
-			int c = j - s[i];
+			float sub = b[j];					// using local var to reduce memory writes, gave a big speedup
+			i = 1;
+			int c = j - s[i];
 			while(c >= 0) {
 				sub -= d[i][c]*x[c];
-				i++;
+				i++;
 				c = j - s[i];
 			}
 			x[j] = sub;							// only one memory-write per j
-		}
-	} else {							   // that's the case almost every time
+		}
+	} else {							   // that's the case almost every time
 		for(j = 0; j <= s[M-1]; j++){
-			float sub = b[j];					// using local var to reduce memory writes, gave a big speedup
-			i = 1;
-			int c = j - s[1];
+			float sub = b[j];					// using local var to reduce memory writes, gave a big speedup
+			i = 1;
+			int c = j - s[1];
 			while(c >= 0) {
 				sub -= d[i][c]*x[c];
-				i++;
+				i++;
 				c = j - s[i];
 			}
 			x[j] = sub;							// only one memory-write per j
 		}
 		for(j = s[M-1]+1; j<N; j++){
-			float sub = b[j];              		// using local var to reduce memory writes, gave a big speedup
-			for(int i=DIAGONALSP1-1;i>0;i--){		// using a constant upperbound allows the compiler to unroll this loop (gives a good speedup)
+			float sub = b[j];              		// using local var to reduce memory writes, gave a big speedup
+			for(int i=DIAGONALSP1-1;i>0;i--){		// using a constant upperbound allows the compiler to unroll this loop (gives a good speedup)
 				sub -= d[i][j-s[i]]*x[j-s[i]];
-			}
+			}
 			x[j] = sub;							// only one memory-write per j
-		}
+		}
 	}
-
+
 	//Now, solve x from D Lt x = y -> Lt x = D^-1 y
 // Took this one out of the while, so it can be parallelized now, which speeds up, because division is expensive
 #ifdef _OPENMP
 #pragma omp parallel for
-#endif
+#endif
     for(j = 0; j < N; j++)
 		x[j] = x[j]/d[0][j];
-
+
 	if(M != DIAGONALSP1){					// can happen in theory
 		while(j-- > 0){
-			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
-			i=1;
-			int c = j+s[1];
-			while(c < N) {
-				sub -= d[i][j]*x[c];
-				i++;
+			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
+			i=1;
+			int c = j+s[1];
+			while(c < N) {
+				sub -= d[i][j]*x[c];
+				i++;
 				c = j+s[i];
-			}
+			}
 			x[j] = sub;							// only one memory-write per j
-		}
-	} else {								// that's the case almost every time
-		for(j=N-1;j>=(N-1)-s[M-1];j--) {
-			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
-			i=1;
-			int c = j+s[1];
-			while(c < N) {
-				sub -= d[i][j]*x[j+s[i]];
-				i++;
+		}
+	} else {								// that's the case almost every time
+		for(j=N-1;j>=(N-1)-s[M-1];j--) {
+			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
+			i=1;
+			int c = j+s[1];
+			while(c < N) {
+				sub -= d[i][j]*x[j+s[i]];
+				i++;
 				c = j+s[i];
-			}
+			}
 			x[j] = sub;							// only one memory-write per j
-		}
-		for(j=(N-2)-s[M-1];j>=0;j--) {
-			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
-			for(int i=DIAGONALSP1-1;i>0;i--){		// using a constant upperbound allows the compiler to unroll this loop (gives a good speedup)
-				sub -= d[i][j]*x[j + s[i]];
-			}
+		}
+		for(j=(N-2)-s[M-1];j>=0;j--) {
+			float sub = x[j];					// using local var to reduce memory writes, gave a big speedup
+			for(int i=DIAGONALSP1-1;i>0;i--){		// using a constant upperbound allows the compiler to unroll this loop (gives a good speedup)
+				sub -= d[i][j]*x[j + s[i]];
+			}
 			x[j] = sub;							// only one memory-write per j
-		}
+		}
 	}
 }
 
@@ -566,19 +566,19 @@ EdgePreservingDecomposition::~EdgePreservingDecomposition(){
 	delete A;
 }
 
-SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop){
-
+SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop){
+
 	if(Blur == NULL)
 		UseBlurForEdgeStop = false,	//Use source if there's no supplied Blur.
-		Blur = new float[n];
-
+		Blur = new float[n];
+
 	if(Scale == 0.0f){
 		memcpy(Blur, Source, n*sizeof(float));
 		return Blur;
 	}
 
 	//Create the edge stopping function a, rotationally symmetric and just one instead of (ax, ay). Maybe don't need Blur yet, so use its memory.
-	float* RESTRICT a;
+	float* RESTRICT a;
 	float* RESTRICT g;
 	if(UseBlurForEdgeStop) a = new float[n], g = Blur;
 	else a = Blur, g = Source;
@@ -586,41 +586,41 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
 	int i;
 	int w1 = w - 1, h1 = h - 1;
 //	float eps = 0.02f;
-	const float sqreps = 0.0004f;                           // removed eps*eps from inner loop
-
-
-#ifdef _OPENMP
-#pragma omp parallel
-#endif
-{
-#ifdef __SSE2__
-	int x;
-	__m128 gxv,gyv;
-	__m128 Scalev = _mm_set1_ps( Scale );
-	__m128 sqrepsv = _mm_set1_ps( sqreps );
-	__m128 EdgeStoppingv = _mm_set1_ps( -EdgeStopping );
-	__m128 zd5v = _mm_set1_ps( 0.5f );
-#endif
+	const float sqreps = 0.0004f;                           // removed eps*eps from inner loop
+
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+{
+#ifdef __SSE2__
+	int x;
+	__m128 gxv,gyv;
+	__m128 Scalev = _mm_set1_ps( Scale );
+	__m128 sqrepsv = _mm_set1_ps( sqreps );
+	__m128 EdgeStoppingv = _mm_set1_ps( -EdgeStopping );
+	__m128 zd5v = _mm_set1_ps( 0.5f );
+#endif
 #ifdef _OPENMP
 #pragma omp for
-#endif
+#endif
 	for(int y = 0; y < h1; y++){
-		float *rg = &g[w*y];
-#ifdef __SSE2__
+		float *rg = &g[w*y];
+#ifdef __SSE2__
 		for(x = 0; x < w1-3; x+=4){
-			//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
-			gxv = (LVFU(rg[x + 1]) -  LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + w]));
+			//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
+			gxv = (LVFU(rg[x + 1]) -  LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + w]));
 			gyv = (LVFU(rg[x + w]) -  LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + 1]));
 			//Apply power to the magnitude of the gradient to get the edge stopping function.
 			_mm_storeu_ps( &a[x + w*y], Scalev * pow_F((zd5v*_mm_sqrt_ps(gxv*gxv + gyv*gyv + sqrepsv)), EdgeStoppingv) );
-		}
+		}
 		for(; x < w1; x++){
 			//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
 			float gx = (rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w]);
 			float gy = (rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1]);
 			//Apply power to the magnitude of the gradient to get the edge stopping function.
 			a[x + w*y] = Scale*pow_F(0.5f*sqrtf(gx*gx + gy*gy + sqreps), -EdgeStopping);
-		}
+		}
 #else
 		for(int x = 0; x < w1; x++){
 			//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
@@ -629,12 +629,12 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
 
 			//Apply power to the magnitude of the gradient to get the edge stopping function.
 			a[x + w*y] = Scale*pow_F(0.5f*sqrtf(gx*gx + gy*gy + sqreps), -EdgeStopping);
-		}
+		}
 #endif
-	}
-}
-
-
+	}
+}
+
+
 	/* Now setup the linear problem. I use the Maxima CAS, here's code for making an FEM formulation for the smoothness term:
 		p(x, y) := (1 - x)*(1 - y);
 		P(m, n) := A[m][n]*p(x, y) + A[m + 1][n]*p(1 - x, y) + A[m + 1][n + 1]*p(1 - x, 1 - y) + A[m][n + 1]*p(x, 1 - y);
@@ -644,13 +644,13 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
 		Integrate(diff(P(u - 1, v), x)*diff(p(1 - x, y), x) + diff(P(u - 1, v), y)*diff(p(1 - x, y), y));
 		Integrate(diff(P(u - 1, v - 1), x)*diff(p(1 - x, 1 - y), x) + diff(P(u - 1, v - 1), y)*diff(p(1 - x, 1 - y), y));
 		Integrate(diff(P(u, v - 1), x)*diff(p(x, 1 - y), x) + diff(P(u, v - 1), y)*diff(p(x, 1 - y), y));
-	So yeah. Use the numeric results of that to fill the matrix A.*/
-
+	So yeah. Use the numeric results of that to fill the matrix A.*/
+
 	memset(a_1, 0, A->DiagonalLength(1)*sizeof(float));
 	memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float));
 	memset(a_w, 0, A->DiagonalLength(w)*sizeof(float));
-	memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
-
+	memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
+
 
 // checked for race condition here
 // a0[] is read and write but adressed by i only
@@ -659,10 +659,10 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
 // a_w is write only
 // a_w1 is write only
 // a_1 is write only
-// So, there should be no race conditions
-
-#ifdef _OPENMP
-#pragma omp parallel for
+// So, there should be no race conditions
+
+#ifdef _OPENMP
+#pragma omp parallel for
 #endif
 	for(int y = 0; y < h; y++){
         int i = y*w;
@@ -673,31 +673,31 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
 			//Remember, only fill the lower triangle. Memory for upper is never made. It's symmetric. Trust.
 			if(x > 0 && y > 0) {						
 				ac = a[i - w - 1]/6.0f;
-				a_w_1[i - w - 1] -= 2.0f*ac;
+				a_w_1[i - w - 1] -= 2.0f*ac;
 				a_w[i - w] -= ac;
-				a_1[i - 1] -= ac;
+				a_1[i - 1] -= ac;
 				a0temp += ac;
 			}
 			if(x < w1 && y > 0) {
 				ac = a[i - w]/6.0f;
-				a_w[i - w] -= ac;
+				a_w[i - w] -= ac;
 				a_w1[i - w + 1] -= 2.0f*ac;
 				a0temp += ac;
 			}
 			if(x > 0 && y < h1) {
 				ac = a[i - 1]/6.0f;
-				a_1[i - 1] -= ac;
+				a_1[i - 1] -= ac;
 				a0temp += ac;
 			}
 			if(x < w1 && y < h1)
-				a0temp += a[i]/6.0f;
-			a0[i] = 4.0f*a0temp;
+				a0temp += a[i]/6.0f;
+			a0[i] = 4.0f*a0temp;
 		}
-	}
-
+	}
+
   if(UseBlurForEdgeStop) delete[] a;
-  //Solve & return.
-  bool success=A->CreateIncompleteCholeskyFactorization(1); //Fill-in of 1 seems to work really good. More doesn't really help and less hurts (slightly).
+  //Solve & return.
+  bool success=A->CreateIncompleteCholeskyFactorization(1); //Fill-in of 1 seems to work really good. More doesn't really help and less hurts (slightly).
   if(!success) {
     fprintf(stderr,"Error: Tonemapping has failed.\n");
     memset(Blur, 0, sizeof(float)*n);  // On failure, set the blur to zero.  This is subsequently exponentiated in CompressDynamicRange.
@@ -705,7 +705,7 @@ SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float
   }
   if(!UseBlurForEdgeStop) memcpy(Blur, Source, n*sizeof(float));
   SparseConjugateGradient(A->PassThroughVectorProduct, Source, n, false, Blur, 0.0f, (void *)A, Iterates, A->PassThroughCholeskyBackSolve);
-  A->KillIncompleteCholeskyFactorization();
+  A->KillIncompleteCholeskyFactorization();
   return Blur;
 }
 
@@ -725,35 +725,35 @@ float *EdgePreservingDecomposition::CreateIteratedBlur(float *Source, float Scal
 	return Blur;
 }
 
-SSEFUNCTION float *EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings, float *Compressed){
-	if(w<300 && h<300) // set number of Reweightings to zero for small images (thumbnails). We could try to find a better solution here.
-		Reweightings = 0;
-		
-	//Small number intended to prevent division by zero. This is different from the eps in CreateBlur.
+SSEFUNCTION float *EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings, float *Compressed){
+	if(w<300 && h<300) // set number of Reweightings to zero for small images (thumbnails). We could try to find a better solution here.
+		Reweightings = 0;
+		
+	//Small number intended to prevent division by zero. This is different from the eps in CreateBlur.
 	const float eps = 0.0001f;
 
 	//We're working with luminance, which does better logarithmic.
-#ifdef __SSE2__
+#ifdef __SSE2__
 #ifdef _OPENMP
 #pragma omp parallel
-#endif
-{
-	__m128 epsv = _mm_set1_ps( eps );
+#endif
+{
+	__m128 epsv = _mm_set1_ps( eps );
 #ifdef _OPENMP
 #pragma omp for
-#endif
+#endif
 	for(int ii = 0; ii < n-3; ii+=4)
-		_mm_storeu_ps( &Source[ii], xlogf(LVFU(Source[ii]) + epsv));
-}
+		_mm_storeu_ps( &Source[ii], xlogf(LVFU(Source[ii]) + epsv));
+}
 	for(int ii = n-(n%4); ii < n; ii++)
-		Source[ii] = xlogf(Source[ii] + eps);
-
+		Source[ii] = xlogf(Source[ii] + eps);
+
 #else
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
 	for(int ii = 0; ii < n; ii++)
-		Source[ii] = xlogf(Source[ii] + eps);
+		Source[ii] = xlogf(Source[ii] + eps);
 #endif
 	
 	//Blur. Also setup memory for Compressed (we can just use u since each element of u is used in one calculation).
@@ -761,35 +761,40 @@ SSEFUNCTION float *EdgePreservingDecomposition::CompressDynamicRange(float *Sour
 	if(Compressed == NULL) Compressed = u;
 
 	//Apply compression, detail boost, unlogging. Compression is done on the logged data and detail boost on unlogged.
-	float temp = CompressionExponent - 1.0f;
-
-#ifdef __SSE2__
+//	float temp = CompressionExponent - 1.0f;
+	float temp;
+	if(DetailBoost>0.f) {
+	float betemp=expf(-(2.f-DetailBoost+0.694f))-1.f;//0.694 = log(2)
+	temp = 1.2f*xlogf( -betemp);
+	}
+	else temp= CompressionExponent - 1.0f;
+#ifdef __SSE2__
 #ifdef _OPENMP
 #pragma omp parallel
-#endif
-{
-	__m128 cev, uev, sourcev;
-	__m128 epsv = _mm_set1_ps( eps );
-	__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
-	__m128 tempv = _mm_set1_ps( temp );
+#endif
+{
+	__m128 cev, uev, sourcev;
+	__m128 epsv = _mm_set1_ps( eps );
+	__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
+	__m128 tempv = _mm_set1_ps( temp );
 #ifdef _OPENMP
-#pragma omp for
-#endif
+#pragma omp for
+#endif
 	for(int i = 0; i < n-3; i+=4){
 		cev = xexpf(LVFU(Source[i]) + LVFU(u[i])*(tempv)) - epsv;
-		uev = xexpf(LVFU(u[i])) - epsv;
-		sourcev = xexpf(LVFU(Source[i])) - epsv;
-		_mm_storeu_ps( &Source[i], sourcev);
+		uev = xexpf(LVFU(u[i])) - epsv;
+		sourcev = xexpf(LVFU(Source[i])) - epsv;
+		_mm_storeu_ps( &Source[i], sourcev);
 		_mm_storeu_ps( &Compressed[i], cev + DetailBoostv * (sourcev - uev) );
-	}
-}
+	}
+}
 	for(int i=n-(n%4); i < n; i++){
 		float ce = xexpf(Source[i] + u[i]*(temp)) - eps;
 		float ue = xexpf(u[i]) - eps;
 		Source[i] = xexpf(Source[i]) - eps;
 		Compressed[i] = ce + DetailBoost*(Source[i] - ue);
-	}
-	
+	}
+	
 #else
 #ifdef _OPENMP
 #pragma omp parallel for
@@ -799,10 +804,10 @@ SSEFUNCTION float *EdgePreservingDecomposition::CompressDynamicRange(float *Sour
 		float ue = xexpf(u[i]) - eps;
 		Source[i] = xexpf(Source[i]) - eps;
 		Compressed[i] = ce + DetailBoost*(Source[i] - ue);
-	}
-#endif
+	}
+#endif
 
-	if(Compressed != u) delete[] u;
+	if(Compressed != u) delete[] u;
 	return Compressed;
 
 }
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index 387f26dc1..37110a83a 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -5092,6 +5092,7 @@ if(!params->epd.enabled) return;
 		float stren=params->epd.strength;
 		float edgest=params->epd.edgeStopping;
 		float sca=params->epd.scale;
+		float gamm=params->epd.gamma;
 		float rew=params->epd.reweightingIterates;
 		unsigned int i, N = Wid*Hei;
 		float Qpro= ( 4.0 / c_)  * ( a_w + 4.0 ) ;//estimate Q max if J=100.0
@@ -5106,7 +5107,7 @@ if(!params->epd.enabled) return;
 		#pragma omp parallel for
 		for (int i=0; i<Hei; i++)
 			for (int j=0; j<Wid; j++)
-				ncie->Q_p[i][j] = ncie->Q_p[i][j]/(Qpro);
+				ncie->Q_p[i][j] = gamm*ncie->Q_p[i][j]/(Qpro);
 
 		float Compression = expf(-stren);		//This modification turns numbers symmetric around 0 into exponents.
 		float DetailBoost = stren;
@@ -5125,7 +5126,7 @@ if(!params->epd.enabled) return;
 		#endif
 		for (int i=0; i<Hei; i++)
 			for (int j=0; j<Wid; j++) {
-			ncie->Q_p[i][j]=ncie->Q_p[i][j]*Qpro;
+			ncie->Q_p[i][j]=(ncie->Q_p[i][j]*Qpro)/gamm;
 			ncie->M_p[i][j]*=s;
 		}
 /*
@@ -5180,31 +5181,39 @@ if(!params->epd.enabled) return;
 float stren=params->epd.strength;
 float edgest=params->epd.edgeStopping;
 float sca=params->epd.scale;
+float gamm=params->epd.gamma;
 float rew=params->epd.reweightingIterates;
 	//Pointers to whole data and size of it.
 	float *L = lab->L[0];
 	float *a = lab->a[0];
 	float *b = lab->b[0];
 	unsigned int i, N = lab->W*lab->H;
-
 	EdgePreservingDecomposition epd = EdgePreservingDecomposition(lab->W, lab->H);
 
 	//Due to the taking of logarithms, L must be nonnegative. Further, scale to 0 to 1 using nominal range of L, 0 to 15 bit.
     float minL = FLT_MAX;
+	float maxL = 0.f;
 #pragma omp parallel
 {
 	float lminL = FLT_MAX;
+	float lmaxL = 0.f;
 #pragma omp for
-	for(i = 0; i < N; i++)
+	for(i = 0; i < N; i++) {
 		if(L[i] < lminL) lminL = L[i];
+		if(L[i] > lmaxL) lmaxL = L[i];	
+	}
 #pragma omp critical
     if(lminL < minL) minL = lminL;
+    if(lmaxL > maxL) maxL = lmaxL;
+	
 }
 	if(minL > 0.0f) minL = 0.0f;		//Disable the shift if there are no negative numbers. I wish there were just no negative numbers to begin with.
 #pragma omp parallel for
 	for(i = 0; i < N; i++)
-		L[i] = (L[i] - minL)/32767.0f;
-
+	//{L[i] = (L[i] - minL)/32767.0f;
+	{L[i] = (L[i] - minL)/maxL;
+		L[i]*=gamm;
+	}
 	//Some interpretations.
 	float Compression = expf(-stren);		//This modification turns numbers symmetric around 0 into exponents.
 	float DetailBoost = stren;
@@ -5230,7 +5239,8 @@ fclose(f);*/
 	for(int ii = 0; ii < N; ii++)
 		a[ii] *= s,
 		b[ii] *= s,
-		L[ii] = L[ii]*32767.0f + minL;
+		//L[ii] = L[ii]*32767.0f*(1.f/gamm) + minL;
+		L[ii] = L[ii]*maxL*(1.f/gamm) + minL;
 }
 
 
diff --git a/rtengine/procevents.h b/rtengine/procevents.h
index 9ca678e94..ec45a0130 100644
--- a/rtengine/procevents.h
+++ b/rtengine/procevents.h
@@ -384,6 +384,8 @@ enum ProcEvent {
 	EvWavlinkedg=356,
 	EvWavCHCurve=357,
 	EvPreProcessHotDeadThresh=358,
+	EvEPDgamma=359,	
+	
 	NUMOFEVENTS	
 };
 }
diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc
index 20d283b6c..e10a8ab49 100644
--- a/rtengine/procparams.cc
+++ b/rtengine/procparams.cc
@@ -845,7 +845,8 @@ void ProcParams::setDefaults () {
     dirpyrDenoise.setDefaults();
 
     epd.enabled = false;
-    epd.strength = 0.25;
+    epd.strength = 0.8;
+    epd.gamma = 1.0;
     epd.edgeStopping = 1.4;
     epd.scale = 1.0;
     epd.reweightingIterates = 0;
@@ -1427,6 +1428,7 @@ int ProcParams::save (Glib::ustring fname, Glib::ustring fname2, bool fnameAbsol
     //Save epd.
     if (!pedited || pedited->epd.enabled)             keyFile.set_boolean ("EPD", "Enabled", epd.enabled);
     if (!pedited || pedited->epd.strength)            keyFile.set_double  ("EPD", "Strength", epd.strength);
+    if (!pedited || pedited->epd.gamma)          	  keyFile.set_double  ("EPD", "Gamma", epd.gamma);
     if (!pedited || pedited->epd.edgeStopping)        keyFile.set_double  ("EPD", "EdgeStopping", epd.edgeStopping);
     if (!pedited || pedited->epd.scale)               keyFile.set_double  ("EPD", "Scale", epd.scale);
     if (!pedited || pedited->epd.reweightingIterates) keyFile.set_integer ("EPD", "ReweightingIterates", epd.reweightingIterates);
@@ -2228,6 +2230,7 @@ if (keyFile.has_group ("Directional Pyramid Denoising")) {//TODO: No longer an a
 if (keyFile.has_group ("EPD")) {
     if(keyFile.has_key("EPD", "Enabled"))             { epd.enabled = keyFile.get_boolean ("EPD", "Enabled"); if (pedited) pedited->epd.enabled = true; }
     if(keyFile.has_key("EPD", "Strength"))            { epd.strength = keyFile.get_double ("EPD", "Strength"); if (pedited) pedited->epd.strength = true; }
+    if(keyFile.has_key("EPD", "Gamma"))         	   { epd.gamma = keyFile.get_double ("EPD", "Gamma"); if (pedited) pedited->epd.gamma = true; }
     if(keyFile.has_key("EPD", "EdgeStopping"))        { epd.edgeStopping = keyFile.get_double ("EPD", "EdgeStopping"); if (pedited) pedited->epd.edgeStopping = true; }
     if(keyFile.has_key("EPD", "Scale"))               { epd.scale = keyFile.get_double ("EPD", "Scale"); if (pedited) pedited->epd.scale = true; }
     if(keyFile.has_key("EPD", "ReweightingIterates")) { epd.reweightingIterates = keyFile.get_integer ("EPD", "ReweightingIterates"); if (pedited) pedited->epd.reweightingIterates = true; }
@@ -2880,6 +2883,7 @@ bool ProcParams::operator== (const ProcParams& other) {
 		&& dirpyrDenoise.passes == other.dirpyrDenoise.passes
 		&& epd.enabled == other.epd.enabled
 		&& epd.strength == other.epd.strength
+		&& epd.gamma == other.epd.gamma
 		&& epd.edgeStopping == other.epd.edgeStopping
 		&& epd.scale == other.epd.scale
 		&& epd.reweightingIterates == other.epd.reweightingIterates
diff --git a/rtengine/procparams.h b/rtengine/procparams.h
index 21eb89f51..e5d273c57 100644
--- a/rtengine/procparams.h
+++ b/rtengine/procparams.h
@@ -588,6 +588,7 @@ class EPDParams{
     public:
         bool   enabled;
         double strength;
+        double gamma;
         double edgeStopping;
         double scale;
         int    reweightingIterates;
diff --git a/rtengine/refreshmap.cc b/rtengine/refreshmap.cc
index f5da261b4..fd9bfefdb 100644
--- a/rtengine/refreshmap.cc
+++ b/rtengine/refreshmap.cc
@@ -380,6 +380,8 @@ DIRPYREQUALIZER,   //EvWavedgedetectthr
 DIRPYREQUALIZER,   //EvWavedgedetectthr2
 DIRPYREQUALIZER,   //EvWavlinkedg
 DIRPYREQUALIZER,   //EvWavCHCurve
-DARKFRAME		   //EvPreProcessHotDeadThresh
+DARKFRAME,		   //EvPreProcessHotDeadThresh
+SHARPENING			//EvEPDgamma
+
 };
 
diff --git a/rtgui/epd.cc b/rtgui/epd.cc
index e9ebba139..f20a66c13 100644
--- a/rtgui/epd.cc
+++ b/rtgui/epd.cc
@@ -27,22 +27,26 @@ EdgePreservingDecompositionUI::EdgePreservingDecompositionUI () : FoldableToolPa
 
 	setEnabledTooltipMarkup(M("TP_EPD_TOOLTIP"));
 
-	strength = Gtk::manage(new Adjuster (M("TP_EPD_STRENGTH"), -2.0, 2.0, 0.01, 0.25));
+	strength = Gtk::manage(new Adjuster (M("TP_EPD_STRENGTH"), -1.0, 2.0, 0.01, 0.8));
+	gamma = Gtk::manage(new Adjuster (M("TP_EPD_GAMMA"), 0.8, 1.5, 0.01, 1.));
 	edgeStopping = Gtk::manage(new Adjuster (M("TP_EPD_EDGESTOPPING"), 0.1, 4.0, 0.01, 1.4));
 	scale = Gtk::manage(new Adjuster (M("TP_EPD_SCALE"), 0.1, 10.0, 0.01, 1.0));
 	reweightingIterates	= Gtk::manage(new Adjuster (M("TP_EPD_REWEIGHTINGITERATES"), 0, 9, 1, 0));
 
 	strength->setAdjusterListener(this);
+	gamma->setAdjusterListener(this);
 	edgeStopping->setAdjusterListener(this);
 	scale->setAdjusterListener(this);
 	reweightingIterates->setAdjusterListener(this);
 
 	strength->show();
+	gamma->show();
 	edgeStopping->show();
 	scale->show();
 	reweightingIterates->show();
 
 	pack_start(*strength);
+	pack_start(*gamma);
 	pack_start(*edgeStopping);
 	pack_start(*scale);
 	pack_start(*reweightingIterates);
@@ -53,6 +57,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
 
 	if(pedited){
 		strength->setEditedState(pedited->epd.strength ? Edited : UnEdited);
+		gamma->setEditedState(pedited->epd.gamma ? Edited : UnEdited);
 		edgeStopping->setEditedState(pedited->epd.edgeStopping ? Edited : UnEdited);
 		scale->setEditedState(pedited->epd.scale ? Edited : UnEdited);
 		reweightingIterates->setEditedState(pedited->epd.reweightingIterates ? Edited : UnEdited);
@@ -62,6 +67,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
 	setEnabled(pp->epd.enabled);
 
 	strength->setValue(pp->epd.strength);
+	gamma->setValue(pp->epd.gamma);
 	edgeStopping->setValue(pp->epd.edgeStopping);
 	scale->setValue(pp->epd.scale);
 	reweightingIterates->setValue(pp->epd.reweightingIterates);
@@ -71,6 +77,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
 
 void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited){
 	pp->epd.strength = strength->getValue();
+	pp->epd.gamma = gamma->getValue();
 	pp->epd.edgeStopping = edgeStopping->getValue();
 	pp->epd.scale = scale->getValue();
 	pp->epd.reweightingIterates = reweightingIterates->getValue();
@@ -78,6 +85,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
 	
 	if(pedited){
 		pedited->epd.strength = strength->getEditedState();
+		pedited->epd.gamma = gamma->getEditedState();
 		pedited->epd.edgeStopping = edgeStopping->getEditedState();
 		pedited->epd.scale = scale->getEditedState();
 		pedited->epd.reweightingIterates = reweightingIterates->getEditedState();
@@ -87,17 +95,20 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
 
 void EdgePreservingDecompositionUI::setDefaults(const ProcParams *defParams, const ParamsEdited *pedited){
 	strength->setDefault(defParams->epd.strength);
+	gamma->setDefault(defParams->epd.gamma);
 	edgeStopping->setDefault(defParams->epd.edgeStopping);
 	scale->setDefault(defParams->epd.scale);
 	reweightingIterates->setDefault(defParams->epd.reweightingIterates);
 
 	if(pedited){
 		strength->setDefaultEditedState(pedited->epd.strength ? Edited : UnEdited);
+		gamma->setDefaultEditedState(pedited->epd.gamma ? Edited : UnEdited);
 		edgeStopping->setDefaultEditedState(pedited->epd.edgeStopping ? Edited : UnEdited);
 		scale->setDefaultEditedState(pedited->epd.scale ? Edited : UnEdited);
 		reweightingIterates->setDefaultEditedState(pedited->epd.reweightingIterates ? Edited : UnEdited);
 	}else{
 		strength->setDefaultEditedState(Irrelevant);
+		gamma->setDefaultEditedState(Irrelevant);
 		edgeStopping->setDefaultEditedState(Irrelevant);
 		scale->setDefaultEditedState(Irrelevant);
 		reweightingIterates->setDefaultEditedState(Irrelevant);
@@ -108,6 +119,8 @@ void EdgePreservingDecompositionUI::adjusterChanged(Adjuster* a, double newval){
 	if(listener && getEnabled()){
 		if(a == strength)
 			listener->panelChanged(EvEPDStrength, Glib::ustring::format(std::setw(2), std::fixed, std::setprecision(2), a->getValue()));
+		else if(a == gamma)
+			listener->panelChanged(EvEPDgamma, Glib::ustring::format(std::setw(2), std::fixed, std::setprecision(2), a->getValue()));
 		else if(a == edgeStopping)
 			listener->panelChanged(EvEPDEdgeStopping, Glib::ustring::format(std::setw(2), std::fixed, std::setprecision(2), a->getValue()));
 		else if(a == scale)
@@ -132,6 +145,7 @@ void EdgePreservingDecompositionUI::setBatchMode(bool batchMode){
 	ToolPanel::setBatchMode(batchMode);
 
 	strength->showEditedCB();
+	gamma->showEditedCB();
 	edgeStopping->showEditedCB();
 	scale->showEditedCB();
 	reweightingIterates->showEditedCB();
diff --git a/rtgui/epd.h b/rtgui/epd.h
index c4bb0545e..c0559a10a 100644
--- a/rtgui/epd.h
+++ b/rtgui/epd.h
@@ -26,6 +26,7 @@
 class EdgePreservingDecompositionUI : public ToolParamBlock, public AdjusterListener, public FoldableToolPanel {
 protected:
 	Adjuster *strength;
+	Adjuster *gamma;
 	Adjuster *edgeStopping;
 	Adjuster *scale;
 	Adjuster *reweightingIterates;
diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc
index d1638a227..83fe14d82 100644
--- a/rtgui/paramsedited.cc
+++ b/rtgui/paramsedited.cc
@@ -201,6 +201,7 @@ void ParamsEdited::set (bool v) {
 	dirpyrDenoise.rgbmethod      = v;
 	epd.enabled                = v;
 	epd.strength            = v;
+	epd.gamma            = v;
 	epd.edgeStopping        = v;
 	epd.scale               = v;
 	epd.reweightingIterates = v;
@@ -600,6 +601,7 @@ void ParamsEdited::initFrom (const std::vector<rtengine::procparams::ProcParams>
 
         epd.enabled = epd.enabled && p.epd.enabled == other.epd.enabled;
         epd.strength = epd.strength && p.epd.strength == other.epd.strength;
+        epd.gamma = epd.gamma && p.epd.gamma == other.epd.gamma;
         epd.edgeStopping = epd.edgeStopping && p.epd.edgeStopping == other.epd.edgeStopping;
         epd.scale = epd.scale && p.epd.scale == other.epd.scale;
         epd.reweightingIterates = epd.reweightingIterates && p.epd.reweightingIterates == other.epd.reweightingIterates;
@@ -999,6 +1001,7 @@ void ParamsEdited::combine (rtengine::procparams::ProcParams& toEdit, const rten
 
 	if (epd.enabled)						toEdit.epd.enabled				= mods.epd.enabled;
 	if (epd.strength)						toEdit.epd.strength				= mods.epd.strength;
+	if (epd.gamma)							toEdit.epd.gamma				= mods.epd.gamma;
 	if (epd.edgeStopping)					toEdit.epd.edgeStopping			= mods.epd.edgeStopping;
 	if (epd.scale)							toEdit.epd.scale				= mods.epd.scale;
 	if (epd.reweightingIterates)			toEdit.epd.reweightingIterates	= mods.epd.reweightingIterates;
diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h
index c95d2fea9..1cad0cbbb 100644
--- a/rtgui/paramsedited.h
+++ b/rtgui/paramsedited.h
@@ -291,6 +291,7 @@ class EPDParamsEdited{
 public:
     bool enabled;
     bool strength;
+    bool gamma;
     bool edgeStopping;
     bool scale;
     bool reweightingIterates;