Removed most of the __x86_64__ dependencies. Also removed _RT_NESTED_OPENMP

This commit is contained in:
heckflosse
2018-01-23 16:05:31 +01:00
parent 4046a3c9b9
commit 96bf06a950
28 changed files with 171 additions and 226 deletions

View File

@@ -828,7 +828,7 @@ class WeightedStdToneCurve : public ToneCurve
{
private:
float Triangle(float refX, float refY, float X2) const;
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
vfloat Triangle(vfloat refX, vfloat refY, vfloat X2) const;
#endif
public:
@@ -904,7 +904,7 @@ inline void StandardToneCurve::BatchApply(
// If we get to the end before getting to an aligned address, just return.
// (Or, for non-SSE mode, if we get to the end.)
return;
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
// Otherwise, we get to the first aligned address; go to the SSE part.
break;
@@ -916,7 +916,7 @@ inline void StandardToneCurve::BatchApply(
i++;
}
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
for (; i + 3 < end; i += 4) {
__m128 r_val = LVF(r[i]);
__m128 g_val = LVF(g[i]);
@@ -1007,7 +1007,7 @@ inline float WeightedStdToneCurve::Triangle(float a, float a1, float b) const
return a1;
}
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) const
{
vfloat a2 = a1 - a;
@@ -1061,7 +1061,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
// If we get to the end before getting to an aligned address, just return.
// (Or, for non-SSE mode, if we get to the end.)
return;
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
// Otherwise, we get to the first aligned address; go to the SSE part.
break;
@@ -1071,7 +1071,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
i++;
}
#if defined( __SSE2__ ) && defined( __x86_64__ )
#ifdef __SSE2__
const vfloat c65535v = F2V(65535.f);
const vfloat zd5v = F2V(0.5f);
const vfloat zd25v = F2V(0.25f);