Cleaned gauss code and included some speedups

This commit is contained in:
heckflosse
2016-01-18 23:56:02 +01:00
parent 33ea7156b8
commit a3c20daa46
12 changed files with 2117 additions and 2177 deletions

View File

@@ -901,11 +901,7 @@ static INLINE vdouble xlog1p(vdouble a) {
typedef struct {
vfloat x, y;
} vfloat2;
#if defined( __FMA__ ) && defined( __x86_64__ )
static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x,y,z); }
#else
static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) { return vaddf(vmulf(x, y), z); }
#endif
static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); }
@@ -921,6 +917,18 @@ static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vca
}
#endif
static INLINE vfloat vselfzero(vmask mask, vfloat x) {
// returns value of x if corresponding mask bits are 1, else returns 0
// faster than vself(mask, x, ZEROV)
return _mm_and_ps((vfloat)mask, x);
}
static INLINE vfloat vselfnotzero(vmask mask, vfloat x) {
// returns value of x if corresponding mask bits are 0, else returns 0
// faster than vself(mask, ZEROV, x)
return _mm_andnot_ps((vfloat)mask, x);
}
static INLINE vint2 vseli2_lt(vfloat f0, vfloat f1, vint2 x, vint2 y) {
vint2 m2 = vcast_vi2_vm(vmaskf_lt(f0, f1));
return vori2(vandi2(m2, x), vandnoti2(m2, y));
@@ -1171,7 +1179,7 @@ static INLINE vfloat xatan2f(vfloat y, vfloat x) {
r = vmulsignf(r, x);
r = vself(vorm(vmaskf_isinf(x), vmaskf_eq(x, vcast_vf_f(0.0f))), vsubf(vcast_vf_f((float)(M_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(M_PI/2)), x))), r);
r = vself(vmaskf_isinf(y), vsubf(vcast_vf_f((float)(M_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(M_PI/4)), x))), r);
r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vself(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)M_PI), vcast_vf_f(0.0f)), r);
r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vselfzero(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)M_PI)), r);
return vself(vorm(vmaskf_isnan(x), vmaskf_isnan(y)), vcast_vf_f(NANf), vmulsignf(r, y));
}
@@ -1304,7 +1312,7 @@ static INLINE vfloat xcbrtf(vfloat d) {
}
static INLINE vfloat LIMV( vfloat a, vfloat b, vfloat c ) {
return _mm_max_ps( b, _mm_min_ps(a,c));
return vmaxf( b, vminf(a,c));
}
static INLINE vfloat ULIMV( vfloat a, vfloat b, vfloat c ){
@@ -1312,13 +1320,13 @@ static INLINE vfloat ULIMV( vfloat a, vfloat b, vfloat c ){
}
static INLINE vfloat SQRV(vfloat a){
return _mm_mul_ps( a,a );
return a * a;
}
static inline void vswap( vmask condition, vfloat &a, vfloat &b) {
vfloat temp = vself(condition, a, b); // the larger of the two
condition = vnotm(condition); // invert the mask
a = vself(condition, a, b); // the smaller of the two
vfloat temp = vself(condition, a, b); // the values which fit to condition
condition = vnotm(condition); // invert the condition
a = vself(condition, a, b); // the values which fit to inverted condition
b = temp;
}