RT 4.0.11.177 Segfault on start with 32-bit Linux, Issue 2118
This commit is contained in:
@@ -78,10 +78,10 @@ template<typename T>
|
|||||||
class LUT {
|
class LUT {
|
||||||
private:
|
private:
|
||||||
// list of variables ordered to improve cache speed
|
// list of variables ordered to improve cache speed
|
||||||
unsigned int maxs;
|
unsigned int maxs;
|
||||||
T * data;
|
T * data;
|
||||||
unsigned int clip, size, owner;
|
unsigned int clip, size, owner;
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
__m128 maxsv __attribute__ ((aligned (16)));
|
__m128 maxsv __attribute__ ((aligned (16)));
|
||||||
__m128 sizev __attribute__ ((aligned (16)));
|
__m128 sizev __attribute__ ((aligned (16)));
|
||||||
__m128i maxsiv __attribute__ ((aligned (16)));
|
__m128i maxsiv __attribute__ ((aligned (16)));
|
||||||
@@ -104,7 +104,7 @@ public:
|
|||||||
owner = 1;
|
owner = 1;
|
||||||
size = s;
|
size = s;
|
||||||
maxs=size-2;
|
maxs=size-2;
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
maxsv = _mm_set1_ps( maxs );
|
maxsv = _mm_set1_ps( maxs );
|
||||||
maxsiv = _mm_cvttps_epi32( maxsv );
|
maxsiv = _mm_cvttps_epi32( maxsv );
|
||||||
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
||||||
@@ -125,7 +125,7 @@ public:
|
|||||||
owner = 1;
|
owner = 1;
|
||||||
size = s;
|
size = s;
|
||||||
maxs=size-2;
|
maxs=size-2;
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
maxsv = _mm_set1_ps( maxs );
|
maxsv = _mm_set1_ps( maxs );
|
||||||
maxsiv = _mm_cvttps_epi32( maxsv );
|
maxsiv = _mm_cvttps_epi32( maxsv );
|
||||||
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
||||||
@@ -148,7 +148,7 @@ public:
|
|||||||
owner = 1;
|
owner = 1;
|
||||||
size = s;
|
size = s;
|
||||||
maxs=size-2;
|
maxs=size-2;
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
maxsv = _mm_set1_ps( size - 2);
|
maxsv = _mm_set1_ps( size - 2);
|
||||||
maxsiv = _mm_cvttps_epi32( maxsv );
|
maxsiv = _mm_cvttps_epi32( maxsv );
|
||||||
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
sizeiv = _mm_set1_epi32( (int)(size-1) );
|
||||||
@@ -190,7 +190,7 @@ public:
|
|||||||
memcpy(this->data,rhs.data,rhs.size*sizeof(T));
|
memcpy(this->data,rhs.data,rhs.size*sizeof(T));
|
||||||
this->size=rhs.size;
|
this->size=rhs.size;
|
||||||
this->maxs=this->size-2;
|
this->maxs=this->size-2;
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
this->maxsv = _mm_set1_ps( this->size - 2);
|
this->maxsv = _mm_set1_ps( this->size - 2);
|
||||||
this->maxsiv = _mm_cvttps_epi32( this->maxsv );
|
this->maxsiv = _mm_cvttps_epi32( this->maxsv );
|
||||||
this->sizeiv = _mm_set1_epi32( (int)(this->size-1) );
|
this->sizeiv = _mm_set1_epi32( (int)(this->size-1) );
|
||||||
@@ -210,14 +210,14 @@ public:
|
|||||||
else
|
else
|
||||||
return data[size - 1];
|
return data[size - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
__m128 operator[](__m128 indexv ) const {
|
__m128 operator[](__m128 indexv ) const {
|
||||||
printf("don't use this operator. It's not ready for production");
|
printf("don't use this operator. It's not ready for production");
|
||||||
return _mm_setzero_ps();
|
return _mm_setzero_ps();
|
||||||
|
|
||||||
// convert floats to ints
|
// convert floats to ints
|
||||||
__m128i idxv = _mm_cvttps_epi32( indexv );
|
__m128i idxv = _mm_cvttps_epi32( indexv );
|
||||||
__m128 tempv, resultv, p1v, p2v;
|
__m128 tempv, resultv, p1v, p2v;
|
||||||
@@ -258,7 +258,7 @@ public:
|
|||||||
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
|
tempv = _mm_shuffle_ps(tempv, tempv, _MM_SHUFFLE(1,1,1,1));
|
||||||
p2v = _mm_move_ss( p2v, tempv);
|
p2v = _mm_move_ss( p2v, tempv);
|
||||||
// now p1v is 3 2 3 1
|
// now p1v is 3 2 3 1
|
||||||
|
|
||||||
// get 1st value
|
// get 1st value
|
||||||
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(0,0,0,0)));
|
idx = _mm_cvtsi128_si32 (_mm_shuffle_epi32(idxv,_MM_SHUFFLE(0,0,0,0)));
|
||||||
tempv = LVFU(data[idx]);
|
tempv = LVFU(data[idx]);
|
||||||
@@ -307,7 +307,7 @@ public:
|
|||||||
// now p1v is 3 2 3 2
|
// now p1v is 3 2 3 2
|
||||||
p1v = _mm_move_ss( p1v, tempv );
|
p1v = _mm_move_ss( p1v, tempv );
|
||||||
// now p1v is 3 2 3 1
|
// now p1v is 3 2 3 1
|
||||||
|
|
||||||
// get 1st value
|
// get 1st value
|
||||||
idx = _mm_cvtsi128_si32 (idxv);
|
idx = _mm_cvtsi128_si32 (idxv);
|
||||||
tempv = _mm_load_ss(&data[idx]);
|
tempv = _mm_load_ss(&data[idx]);
|
||||||
@@ -342,8 +342,8 @@ public:
|
|||||||
T p2 = data[idx + 1]-p1;
|
T p2 = data[idx + 1]-p1;
|
||||||
return (p1 + p2*diff);
|
return (p1 + p2*diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
// Debug facility ; dump the content of the LUT in a file. No control of the filename is done
|
// Debug facility ; dump the content of the LUT in a file. No control of the filename is done
|
||||||
void dump(Glib::ustring fname) {
|
void dump(Glib::ustring fname) {
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
* (at your option) any later version.
|
* (at your option) any later version.
|
||||||
*
|
*
|
||||||
* RawTherapee is distributed in the hope that it will be useful,
|
* RawTherapee is distributed in the hope that it will be useful,
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
@@ -71,18 +71,18 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int
|
|||||||
else {
|
else {
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
//experimental dirpyr shmap
|
//experimental dirpyr shmap
|
||||||
|
|
||||||
float thresh = 100*radius;//1000;
|
float thresh = 100*radius;//1000;
|
||||||
LUTf rangefn(0x10000);
|
LUTf rangefn(0x10000);
|
||||||
float ** dirpyrlo[2];
|
float ** dirpyrlo[2];
|
||||||
|
|
||||||
int intfactor = 1024;//16384;
|
int intfactor = 1024;//16384;
|
||||||
|
|
||||||
//set up range functions
|
//set up range functions
|
||||||
for (int i=0; i<0x10000; i++) {
|
for (int i=0; i<0x10000; i++) {
|
||||||
//rangefn[i] = (int)(((thresh)/((double)(i) + (thresh)))*intfactor);
|
//rangefn[i] = (int)(((thresh)/((double)(i) + (thresh)))*intfactor);
|
||||||
rangefn[i] = static_cast<int>(xexpf(-(min(10.0f,(static_cast<float>(i)*i) / (thresh*thresh))))*intfactor);
|
rangefn[i] = static_cast<int>(xexpf(-(min(10.0f,(static_cast<float>(i)*i) / (thresh*thresh))))*intfactor);
|
||||||
//if (rangefn[i]<0 || rangefn[i]>intfactor)
|
//if (rangefn[i]<0 || rangefn[i]>intfactor)
|
||||||
//printf("i=%d rangefn=%d arg=%f \n",i,rangefn[i], float(i*i) / (thresh*thresh));
|
//printf("i=%d rangefn=%d arg=%f \n",i,rangefn[i], float(i*i) / (thresh*thresh));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,11 +104,11 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
dirpyr_shmap(dirpyrlo[1-indx], map, W, H, rangefn, level, scale );
|
dirpyr_shmap(dirpyrlo[1-indx], map, W, H, rangefn, level, scale );
|
||||||
|
|
||||||
|
|
||||||
freeArray<float>(dirpyrlo[0], H);
|
freeArray<float>(dirpyrlo[0], H);
|
||||||
freeArray<float>(dirpyrlo[1], H);
|
freeArray<float>(dirpyrlo[1], H);
|
||||||
|
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
/*
|
/*
|
||||||
@@ -122,8 +122,8 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int
|
|||||||
map[i][j] = (buffer[i-1][j-1]+buffer[i-1][j]+buffer[i-1][j+1]+buffer[i][j-1]+buffer[i][j]+buffer[i][j+1]+buffer[i+1][j-1]+buffer[i+1][j]+buffer[i+1][j+1])/9;
|
map[i][j] = (buffer[i-1][j-1]+buffer[i-1][j]+buffer[i-1][j+1]+buffer[i][j-1]+buffer[i][j]+buffer[i][j+1]+buffer[i+1][j-1]+buffer[i+1][j]+buffer[i+1][j+1])/9;
|
||||||
else
|
else
|
||||||
map[i][j] = buffer[i][j];
|
map[i][j] = buffer[i][j];
|
||||||
*/
|
*/
|
||||||
|
|
||||||
}
|
}
|
||||||
// update average, minimum, maximum
|
// update average, minimum, maximum
|
||||||
|
|
||||||
@@ -178,21 +178,21 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
//scale is spacing of directional averaging weights
|
//scale is spacing of directional averaging weights
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// calculate weights, compute directionally weighted average
|
// calculate weights, compute directionally weighted average
|
||||||
|
|
||||||
int scalewin, halfwin;
|
int scalewin, halfwin;
|
||||||
|
|
||||||
if(level < 2) {
|
if(level < 2) {
|
||||||
halfwin = 1;
|
halfwin = 1;
|
||||||
scalewin = halfwin*scale;
|
scalewin = halfwin*scale;
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
__m128 dirwtv, valv, normv;
|
__m128 dirwtv, valv, normv;
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
int j;
|
int j;
|
||||||
@@ -205,7 +205,7 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j%scale; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j%scale; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -215,12 +215,12 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
}
|
}
|
||||||
data_coarse[i][j] = val/norm; // low pass filter
|
data_coarse[i][j] = val/norm; // low pass filter
|
||||||
}
|
}
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
for(; j < (width-scalewin)-3; j+=4)
|
for(; j < (width-scalewin)-3; j+=4)
|
||||||
{
|
{
|
||||||
valv= _mm_setzero_ps();
|
valv= _mm_setzero_ps();
|
||||||
normv= _mm_setzero_ps();
|
normv= _mm_setzero_ps();
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwtv = ( rangefn[_mm_cvttps_epi32(vabsf(LVFU(data_fine[inbr][jnbr])-LVFU(data_fine[i][j])))] );
|
dirwtv = ( rangefn[_mm_cvttps_epi32(vabsf(LVFU(data_fine[inbr][jnbr])-LVFU(data_fine[i][j])))] );
|
||||||
@@ -234,7 +234,7 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -250,7 +250,7 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -265,7 +265,7 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<width; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<width; jnbr+=scale) {
|
||||||
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -279,16 +279,16 @@ void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
halfwin=2;
|
halfwin=2;
|
||||||
scalewin = halfwin*scale;
|
scalewin = halfwin*scale;
|
||||||
int domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,2,2,1},{1,2,2,2,1},{1,1,1,1,1}};
|
int domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,2,2,1},{1,2,2,2,1},{1,1,1,1,1}};
|
||||||
//generate domain kernel
|
//generate domain kernel
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
__m128 dirwtv, valv, normv;
|
__m128 dirwtv, valv, normv;
|
||||||
float domkerv[5][5][4] __attribute__ ((aligned (16))) = {{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}}};
|
float domkerv[5][5][4] __attribute__ ((aligned (16))) = {{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{2,2,2,2},{2,2,2,2},{2,2,2,2},{1,1,1,1}},{{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1},{1,1,1,1}}};
|
||||||
|
|
||||||
@@ -303,7 +303,7 @@ else {
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j%scale; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j%scale; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -313,12 +313,12 @@ else {
|
|||||||
}
|
}
|
||||||
data_coarse[i][j] = val/norm; // low pass filter
|
data_coarse[i][j] = val/norm; // low pass filter
|
||||||
}
|
}
|
||||||
#if defined( __SSE2__ ) && ((defined( WIN32 ) && defined( __x86_64__ )) || !defined( WIN32 ))
|
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
||||||
for(; j < width-scalewin-3; j+=4)
|
for(; j < width-scalewin-3; j+=4)
|
||||||
{
|
{
|
||||||
valv = _mm_setzero_ps();
|
valv = _mm_setzero_ps();
|
||||||
normv = _mm_setzero_ps();
|
normv = _mm_setzero_ps();
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwtv = ( _mm_load_ps((float*)&domkerv[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin]) * rangefn[_mm_cvttps_epi32(vabsf(LVFU(data_fine[inbr][jnbr])-LVFU(data_fine[i][j])))] );
|
dirwtv = ( _mm_load_ps((float*)&domkerv[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin]) * rangefn[_mm_cvttps_epi32(vabsf(LVFU(data_fine[inbr][jnbr])-LVFU(data_fine[i][j])))] );
|
||||||
@@ -332,7 +332,7 @@ else {
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -348,7 +348,7 @@ else {
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<=j+scalewin; jnbr+=scale) {
|
||||||
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -363,7 +363,7 @@ else {
|
|||||||
{
|
{
|
||||||
float val=0;
|
float val=0;
|
||||||
float norm=0;
|
float norm=0;
|
||||||
|
|
||||||
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
for(int inbr=max(i-scalewin,i%scale); inbr<=min(i+scalewin, height-1); inbr+=scale) {
|
||||||
for (int jnbr=j-scalewin; jnbr<width; jnbr+=scale) {
|
for (int jnbr=j-scalewin; jnbr<width; jnbr+=scale) {
|
||||||
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
dirwt = ( domker[(inbr-i)/scale+halfwin][(jnbr-j)/scale+halfwin] * rangefn[abs(data_fine[inbr][jnbr]-data_fine[i][j])] );
|
||||||
@@ -375,9 +375,9 @@ else {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}//end of SHMap
|
}//end of SHMap
|
||||||
|
Reference in New Issue
Block a user