diff --git a/rtdata/themes/RawTherapee-GTK3-20_.css b/rtdata/themes/RawTherapee-GTK3-20_.css index 3f5072f48..a5f52a769 100644 --- a/rtdata/themes/RawTherapee-GTK3-20_.css +++ b/rtdata/themes/RawTherapee-GTK3-20_.css @@ -640,11 +640,11 @@ spinbutton entry { } spinbutton button { margin: 0; - padding: 0; - border-radius: 0; + padding: 0; + border-radius: 0; } spinbutton button.up { - border-radius: 0 0.16666666666666666666em 0.16666666666666666666em 0; + border-radius: 0 0.16666666666666666666em 0.16666666666666666666em 0; } entry:disabled, spinbutton:disabled { @@ -841,7 +841,7 @@ flowboxchild:selected { color: #CCCCCC; padding: 0; margin: 0 0.25em 0 0.25em; - font-size: 1.1em; + font-size: 1.1em; } #MyExpanderTitle:hover { background-color: #202020; @@ -955,7 +955,7 @@ fontchooser scrolledwindow, } #PlacesPaned { - margin: 0; + margin: 0; padding: 0 0.4166666666666666em 0 0; } #PlacesPaned > box:nth-child(1) scrolledwindow + grid { @@ -1050,7 +1050,7 @@ dialog frame > label:not(.dummy) { #LabelRightNotebook { padding: 0.4166666666666666em; margin: 0.1666666666666666em; - font-size: 1.25em; + font-size: 1.25em; } #ToolPanelNotebook { diff --git a/rtdata/themes/RawTherapee-GTK3-_19.css b/rtdata/themes/RawTherapee-GTK3-_19.css index 58742e8ca..f4bec23b1 100644 --- a/rtdata/themes/RawTherapee-GTK3-_19.css +++ b/rtdata/themes/RawTherapee-GTK3-_19.css @@ -46,7 +46,7 @@ GtkBox { border-style: none; border-radius: 0; margin: 0; - padding: 0; + padding: 0; } GtkGrid { diff --git a/rtengine/CMakeLists.txt b/rtengine/CMakeLists.txt index 8ee905e2c..40ddaefbb 100644 --- a/rtengine/CMakeLists.txt +++ b/rtengine/CMakeLists.txt @@ -98,16 +98,6 @@ set(RTENGINESOURCEFILES ipwavelet.cc jdatasrc.cc jpeg_ijg/jpeg_memsrc.cc - klt/convolve.cc - klt/error.cc - klt/klt_util.cc - klt/klt.cc - klt/pnmio.cc - klt/pyramid.cc - klt/selectGoodFeatures.cc - klt/storeFeatures.cc - klt/trackFeatures.cc - klt/writeFeatures.cc labimage.cc lcp.cc lj92.c diff --git a/rtengine/ciecam02.cc b/rtengine/ciecam02.cc index dfef273d6..dc71fee85 100644 --- a/rtengine/ciecam02.cc +++ b/rtengine/ciecam02.cc @@ -145,8 +145,8 @@ void Ciecam02::curveJfloat (float br, float contr, const LUTu & histogram, LUTf } outCurve *= 32767.f; - //printf("out500=%f out15000=%f\n", outCurve[500], outCurve[15000]); - //outCurve.dump("brig"); + //printf("out500=%f out15000=%f\n", outCurve[500], outCurve[15000]); + //outCurve.dump("brig"); } /** diff --git a/rtengine/color.h b/rtengine/color.h index 97835ba10..2acf675d4 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -1070,12 +1070,11 @@ public: */ static inline double gamma2 (double x) // g3 1+g4 { - // return x <= 0.003041 ? x * 12.92310 : 1.055 * exp(log(x) / 2.39990) - 0.055;//calculate with calcgamma + // return x <= 0.003041 ? x * 12.92310 : 1.055 * exp(log(x) / 2.39990) - 0.055;//calculate with calcgamma //return x <= 0.0031308 ? x * 12.92310 : 1.055 * exp(log(x) / sRGBGammaCurve) - 0.055;//standard discontinuous - //very small differences between the 2 + //very small differences between the 2 return x <= 0.003040 ? x * 12.92310 : 1.055 * exp(log(x) / sRGBGammaCurve) - 0.055;//continuous - // return x <= 0.003041 ? x * 12.92310 : 1.055011 * exp(log(x) / sRGBGammaCurve) - 0.055011;//continuous - + // return x <= 0.003041 ? x * 12.92310 : 1.055011 * exp(log(x) / sRGBGammaCurve) - 0.055011;//continuous } @@ -1087,12 +1086,11 @@ public: */ static inline double igamma2 (double x) //g2 { - // return x <= 0.039289 ? x / 12.92310 : exp(log((x + 0.055) / 1.055) * 2.39990);//calculate with calcgamma - // return x <= 0.04045 ? x / 12.92310 : exp(log((x + 0.055) / 1.055) * sRGBGammaCurve);//standard discontinuous - //very small differences between the 4 + // return x <= 0.039289 ? x / 12.92310 : exp(log((x + 0.055) / 1.055) * 2.39990);//calculate with calcgamma + // return x <= 0.04045 ? x / 12.92310 : exp(log((x + 0.055) / 1.055) * sRGBGammaCurve);//standard discontinuous + //very small differences between the 4 return x <= 0.039286 ? x / 12.92310 : exp(log((x + 0.055) / 1.055) * sRGBGammaCurve);//continuous - // return x <= 0.039293 ? x / 12.92310 : exp(log((x + 0.055011) / 1.055011) * sRGBGammaCurve);//continuous - + // return x <= 0.039293 ? x / 12.92310 : exp(log((x + 0.055011) / 1.055011) * sRGBGammaCurve);//continuous } diff --git a/rtengine/sleef.c b/rtengine/sleef.c index cc92be108..a01aef5b9 100644 --- a/rtengine/sleef.c +++ b/rtengine/sleef.c @@ -26,29 +26,29 @@ #define pow_F(a,b) (xexpf(b*xlogf(a))) __inline int64_t doubleToRawLongBits(double d) { - union { - double f; - int64_t i; - } tmp; - tmp.f = d; - return tmp.i; + union { + double f; + int64_t i; + } tmp; + tmp.f = d; + return tmp.i; } __inline double longBitsToDouble(int64_t i) { - union { - double f; - int64_t i; - } tmp; - tmp.i = i; - return tmp.f; + union { + double f; + int64_t i; + } tmp; + tmp.i = i; + return tmp.f; } __inline double xfabs(double x) { - return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x)); + return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x)); } __inline double mulsign(double x, double y) { - return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63))); + return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63))); } __inline double sign(double d) { return mulsign(1, d); } @@ -61,827 +61,827 @@ __inline int xisminf(double x) { return x == -rtengine::RT_INFINITY; } __inline int xispinf(double x) { return x == rtengine::RT_INFINITY; } __inline double ldexpk(double x, int q) { - double u; - int m; - m = q >> 31; - m = (((m + q) >> 9) - m) << 7; - q = q - (m << 2); - u = longBitsToDouble(((int64_t)(m + 0x3ff)) << 52); - double u2 = u*u; - u2 = u2 * u2; - x = x * u2; - u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); - return x * u; + double u; + int m; + m = q >> 31; + m = (((m + q) >> 9) - m) << 7; + q = q - (m << 2); + u = longBitsToDouble(((int64_t)(m + 0x3ff)) << 52); + double u2 = u*u; + u2 = u2 * u2; + x = x * u2; + u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); + return x * u; } __inline double xldexp(double x, int q) { return ldexpk(x, q); } __inline int ilogbp1(double d) { - int m = d < 4.9090934652977266E-91; - d = m ? 2.037035976334486E90 * d : d; - int q = (doubleToRawLongBits(d) >> 52) & 0x7ff; - q = m ? q - (300 + 0x03fe) : q - 0x03fe; - return q; + int m = d < 4.9090934652977266E-91; + d = m ? 2.037035976334486E90 * d : d; + int q = (doubleToRawLongBits(d) >> 52) & 0x7ff; + q = m ? q - (300 + 0x03fe) : q - 0x03fe; + return q; } __inline int xilogb(double d) { - int e = ilogbp1(xfabs(d)) - 1; - e = d == 0 ? (-2147483647 - 1) : e; - e = d == rtengine::RT_INFINITY || d == -rtengine::RT_INFINITY ? 2147483647 : e; - return e; + int e = ilogbp1(xfabs(d)) - 1; + e = d == 0 ? (-2147483647 - 1) : e; + e = d == rtengine::RT_INFINITY || d == -rtengine::RT_INFINITY ? 2147483647 : e; + return e; } __inline double upper(double d) { - return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL); + return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL); } typedef struct { - double x, y; + double x, y; } double2; typedef struct { - float x, y; + float x, y; } float2; __inline double2 dd(double h, double l) { - double2 ret; - ret.x = h; ret.y = l; - return ret; + double2 ret; + ret.x = h; ret.y = l; + return ret; } __inline double2 normalize_d(double2 t) { - double2 s; + double2 s; - s.x = t.x + t.y; - s.y = t.x - s.x + t.y; + s.x = t.x + t.y; + s.y = t.x - s.x + t.y; - return s; + return s; } __inline double2 scale_d(double2 d, double s) { - double2 r; + double2 r; - r.x = d.x * s; - r.y = d.y * s; + r.x = d.x * s; + r.y = d.y * s; - return r; + return r; } __inline double2 add2_ss(double x, double y) { - double2 r; + double2 r; - r.x = x + y; - double v = r.x - x; - r.y = (x - (r.x - v)) + (y - v); + r.x = x + y; + double v = r.x - x; + r.y = (x - (r.x - v)) + (y - v); - return r; + return r; } __inline double2 add_ds(double2 x, double y) { - // |x| >= |y| + // |x| >= |y| - double2 r; + double2 r; - assert(xisnan(x.x) || xisnan(y) || xfabs(x.x) >= xfabs(y)); + assert(xisnan(x.x) || xisnan(y) || xfabs(x.x) >= xfabs(y)); - r.x = x.x + y; - r.y = x.x - r.x + y + x.y; + r.x = x.x + y; + r.y = x.x - r.x + y + x.y; - return r; + return r; } __inline double2 add2_ds(double2 x, double y) { - // |x| >= |y| + // |x| >= |y| - double2 r; + double2 r; - r.x = x.x + y; - double v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y - v); - r.y += x.y; + r.x = x.x + y; + double v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y - v); + r.y += x.y; - return r; + return r; } __inline double2 add_sd(double x, double2 y) { - // |x| >= |y| + // |x| >= |y| - double2 r; + double2 r; - assert(xisnan(x) || xisnan(y.x) || xfabs(x) >= xfabs(y.x)); + assert(xisnan(x) || xisnan(y.x) || xfabs(x) >= xfabs(y.x)); - r.x = x + y.x; - r.y = x - r.x + y.x + y.y; + r.x = x + y.x; + r.y = x - r.x + y.x + y.y; - return r; + return r; } __inline double2 add_dd(double2 x, double2 y) { - // |x| >= |y| + // |x| >= |y| - double2 r; + double2 r; - assert(xisnan(x.x) || xisnan(y.x) || xfabs(x.x) >= xfabs(y.x)); + assert(xisnan(x.x) || xisnan(y.x) || xfabs(x.x) >= xfabs(y.x)); - r.x = x.x + y.x; - r.y = x.x - r.x + y.x + x.y + y.y; + r.x = x.x + y.x; + r.y = x.x - r.x + y.x + x.y + y.y; - return r; + return r; } __inline double2 add2_dd(double2 x, double2 y) { - double2 r; + double2 r; - r.x = x.x + y.x; - double v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y.x - v); - r.y += x.y + y.y; + r.x = x.x + y.x; + double v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y.x - v); + r.y += x.y + y.y; - return r; + return r; } __inline double2 div_dd(double2 n, double2 d) { - double t = 1.0 / d.x; - double dh = upper(d.x), dl = d.x - dh; - double th = upper(t ), tl = t - th; - double nhh = upper(n.x), nhl = n.x - nhh; + double t = 1.0 / d.x; + double dh = upper(d.x), dl = d.x - dh; + double th = upper(t ), tl = t - th; + double nhh = upper(n.x), nhl = n.x - nhh; - double2 q; + double2 q; - q.x = n.x * t; + q.x = n.x * t; - double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + - q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); + double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); - q.y = t * (n.y - q.x * d.y) + u; + q.y = t * (n.y - q.x * d.y) + u; - return q; + return q; } __inline double2 mul_ss(double x, double y) { - double xh = upper(x), xl = x - xh; - double yh = upper(y), yl = y - yh; - double2 r; + double xh = upper(x), xl = x - xh; + double yh = upper(y), yl = y - yh; + double2 r; - r.x = x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; + r.x = x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; - return r; + return r; } __inline double2 mul_ds(double2 x, double y) { - double xh = upper(x.x), xl = x.x - xh; - double yh = upper(y ), yl = y - yh; - double2 r; + double xh = upper(x.x), xl = x.x - xh; + double yh = upper(y ), yl = y - yh; + double2 r; - r.x = x.x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; + r.x = x.x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; - return r; + return r; } __inline double2 mul_dd(double2 x, double2 y) { - double xh = upper(x.x), xl = x.x - xh; - double yh = upper(y.x), yl = y.x - yh; - double2 r; + double xh = upper(x.x), xl = x.x - xh; + double yh = upper(y.x), yl = y.x - yh; + double2 r; - r.x = x.x * y.x; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; + r.x = x.x * y.x; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; - return r; + return r; } __inline double2 squ_d(double2 x) { - double xh = upper(x.x), xl = x.x - xh; - double2 r; + double xh = upper(x.x), xl = x.x - xh; + double2 r; - r.x = x.x * x.x; - r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); + r.x = x.x * x.x; + r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); - return r; + return r; } __inline double2 rec_s(double d) { - double t = 1.0 / d; - double dh = upper(d), dl = d - dh; - double th = upper(t), tl = t - th; - double2 q; + double t = 1.0 / d; + double dh = upper(d), dl = d - dh; + double th = upper(t), tl = t - th; + double2 q; - q.x = t; - q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); + q.x = t; + q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); - return q; + return q; } __inline double2 sqrt_d(double2 d) { - double t = sqrt(d.x + d.y); - return scale_d(mul_dd(add2_dd(d, mul_ss(t, t)), rec_s(t)), 0.5); + double t = sqrt(d.x + d.y); + return scale_d(mul_dd(add2_dd(d, mul_ss(t, t)), rec_s(t)), 0.5); } __inline double atan2k(double y, double x) { - double s, t, u; - int q = 0; + double s, t, u; + int q = 0; - if (x < 0) { x = -x; q = -2; } - if (y > x) { t = x; x = y; y = -t; q += 1; } + if (x < 0) { x = -x; q = -2; } + if (y > x) { t = x; x = y; y = -t; q += 1; } - s = y / x; - t = s * s; + s = y / x; + t = s * s; - u = -1.88796008463073496563746e-05; - u = u * t + (0.000209850076645816976906797); - u = u * t + (-0.00110611831486672482563471); - u = u * t + (0.00370026744188713119232403); - u = u * t + (-0.00889896195887655491740809); - u = u * t + (0.016599329773529201970117); - u = u * t + (-0.0254517624932312641616861); - u = u * t + (0.0337852580001353069993897); - u = u * t + (-0.0407629191276836500001934); - u = u * t + (0.0466667150077840625632675); - u = u * t + (-0.0523674852303482457616113); - u = u * t + (0.0587666392926673580854313); - u = u * t + (-0.0666573579361080525984562); - u = u * t + (0.0769219538311769618355029); - u = u * t + (-0.090908995008245008229153); - u = u * t + (0.111111105648261418443745); - u = u * t + (-0.14285714266771329383765); - u = u * t + (0.199999999996591265594148); - u = u * t + (-0.333333333333311110369124); + u = -1.88796008463073496563746e-05; + u = u * t + (0.000209850076645816976906797); + u = u * t + (-0.00110611831486672482563471); + u = u * t + (0.00370026744188713119232403); + u = u * t + (-0.00889896195887655491740809); + u = u * t + (0.016599329773529201970117); + u = u * t + (-0.0254517624932312641616861); + u = u * t + (0.0337852580001353069993897); + u = u * t + (-0.0407629191276836500001934); + u = u * t + (0.0466667150077840625632675); + u = u * t + (-0.0523674852303482457616113); + u = u * t + (0.0587666392926673580854313); + u = u * t + (-0.0666573579361080525984562); + u = u * t + (0.0769219538311769618355029); + u = u * t + (-0.090908995008245008229153); + u = u * t + (0.111111105648261418443745); + u = u * t + (-0.14285714266771329383765); + u = u * t + (0.199999999996591265594148); + u = u * t + (-0.333333333333311110369124); - t = u * t * s + s; - t = q * (rtengine::RT_PI_2) + t; + t = u * t * s + s; + t = q * (rtengine::RT_PI_2) + t; - return t; + return t; } __inline double xatan2(double y, double x) { - double r = atan2k(xfabs(y), x); + double r = atan2k(xfabs(y), x); - r = mulsign(r, x); - if (xisinf(x) || x == 0) r = rtengine::RT_PI_2 - (xisinf(x) ? (sign(x) * (rtengine::RT_PI_2)) : 0); - if (xisinf(y) ) r = rtengine::RT_PI_2 - (xisinf(x) ? (sign(x) * (rtengine::RT_PI*1/4)) : 0); - if ( y == 0) r = (sign(x) == -1 ? rtengine::RT_PI : 0); + r = mulsign(r, x); + if (xisinf(x) || x == 0) r = rtengine::RT_PI_2 - (xisinf(x) ? (sign(x) * (rtengine::RT_PI_2)) : 0); + if (xisinf(y) ) r = rtengine::RT_PI_2 - (xisinf(x) ? (sign(x) * (rtengine::RT_PI*1/4)) : 0); + if ( y == 0) r = (sign(x) == -1 ? rtengine::RT_PI : 0); - return xisnan(x) || xisnan(y) ? rtengine::RT_NAN : mulsign(r, y); + return xisnan(x) || xisnan(y) ? rtengine::RT_NAN : mulsign(r, y); } __inline double xasin(double d) { - return mulsign(atan2k(xfabs(d), sqrt((1+d)*(1-d))), d); + return mulsign(atan2k(xfabs(d), sqrt((1+d)*(1-d))), d); } __inline double xacos(double d) { - return mulsign(atan2k(sqrt((1+d)*(1-d)), xfabs(d)), d) + (d < 0 ? rtengine::RT_PI : 0); + return mulsign(atan2k(sqrt((1+d)*(1-d)), xfabs(d)), d) + (d < 0 ? rtengine::RT_PI : 0); } __inline double xatan(double s) { - double t, u; - int q = 0; + double t, u; + int q = 0; - if (s < 0) { s = -s; q = 2; } - if (s > 1) { s = 1.0 / s; q |= 1; } + if (s < 0) { s = -s; q = 2; } + if (s > 1) { s = 1.0 / s; q |= 1; } - t = s * s; + t = s * s; - u = -1.88796008463073496563746e-05; - u = u * t + (0.000209850076645816976906797); - u = u * t + (-0.00110611831486672482563471); - u = u * t + (0.00370026744188713119232403); - u = u * t + (-0.00889896195887655491740809); - u = u * t + (0.016599329773529201970117); - u = u * t + (-0.0254517624932312641616861); - u = u * t + (0.0337852580001353069993897); - u = u * t + (-0.0407629191276836500001934); - u = u * t + (0.0466667150077840625632675); - u = u * t + (-0.0523674852303482457616113); - u = u * t + (0.0587666392926673580854313); - u = u * t + (-0.0666573579361080525984562); - u = u * t + (0.0769219538311769618355029); - u = u * t + (-0.090908995008245008229153); - u = u * t + (0.111111105648261418443745); - u = u * t + (-0.14285714266771329383765); - u = u * t + (0.199999999996591265594148); - u = u * t + (-0.333333333333311110369124); + u = -1.88796008463073496563746e-05; + u = u * t + (0.000209850076645816976906797); + u = u * t + (-0.00110611831486672482563471); + u = u * t + (0.00370026744188713119232403); + u = u * t + (-0.00889896195887655491740809); + u = u * t + (0.016599329773529201970117); + u = u * t + (-0.0254517624932312641616861); + u = u * t + (0.0337852580001353069993897); + u = u * t + (-0.0407629191276836500001934); + u = u * t + (0.0466667150077840625632675); + u = u * t + (-0.0523674852303482457616113); + u = u * t + (0.0587666392926673580854313); + u = u * t + (-0.0666573579361080525984562); + u = u * t + (0.0769219538311769618355029); + u = u * t + (-0.090908995008245008229153); + u = u * t + (0.111111105648261418443745); + u = u * t + (-0.14285714266771329383765); + u = u * t + (0.199999999996591265594148); + u = u * t + (-0.333333333333311110369124); - t = s + s * (t * u); + t = s + s * (t * u); - if ((q & 1) != 0) t = 1.570796326794896557998982 - t; - if ((q & 2) != 0) t = -t; + if ((q & 1) != 0) t = 1.570796326794896557998982 - t; + if ((q & 2) != 0) t = -t; - return t; + return t; } __inline double xsin(double d) { - int q; - double u, s; + int q; + double u, s; - q = (int)xrint(d * rtengine::RT_1_PI); + q = (int)xrint(d * rtengine::RT_1_PI); - d = mla(q, -PI4_A*4, d); - d = mla(q, -PI4_B*4, d); - d = mla(q, -PI4_C*4, d); + d = mla(q, -PI4_A*4, d); + d = mla(q, -PI4_B*4, d); + d = mla(q, -PI4_C*4, d); - s = d * d; + s = d * d; - if ((q & 1) != 0) d = -d; + if ((q & 1) != 0) d = -d; - u = -7.97255955009037868891952e-18; - u = mla(u, s, 2.81009972710863200091251e-15); - u = mla(u, s, -7.64712219118158833288484e-13); - u = mla(u, s, 1.60590430605664501629054e-10); - u = mla(u, s, -2.50521083763502045810755e-08); - u = mla(u, s, 2.75573192239198747630416e-06); - u = mla(u, s, -0.000198412698412696162806809); - u = mla(u, s, 0.00833333333333332974823815); - u = mla(u, s, -0.166666666666666657414808); + u = -7.97255955009037868891952e-18; + u = mla(u, s, 2.81009972710863200091251e-15); + u = mla(u, s, -7.64712219118158833288484e-13); + u = mla(u, s, 1.60590430605664501629054e-10); + u = mla(u, s, -2.50521083763502045810755e-08); + u = mla(u, s, 2.75573192239198747630416e-06); + u = mla(u, s, -0.000198412698412696162806809); + u = mla(u, s, 0.00833333333333332974823815); + u = mla(u, s, -0.166666666666666657414808); - u = mla(s, u * d, d); + u = mla(s, u * d, d); - return u; + return u; } __inline double xcos(double d) { - int q; - double u, s; + int q; + double u, s; - q = 1 + 2*(int)xrint(d * rtengine::RT_1_PI - 0.5); + q = 1 + 2*(int)xrint(d * rtengine::RT_1_PI - 0.5); - d = mla(q, -PI4_A*2, d); - d = mla(q, -PI4_B*2, d); - d = mla(q, -PI4_C*2, d); + d = mla(q, -PI4_A*2, d); + d = mla(q, -PI4_B*2, d); + d = mla(q, -PI4_C*2, d); - s = d * d; + s = d * d; - if ((q & 2) == 0) d = -d; + if ((q & 2) == 0) d = -d; - u = -7.97255955009037868891952e-18; - u = mla(u, s, 2.81009972710863200091251e-15); - u = mla(u, s, -7.64712219118158833288484e-13); - u = mla(u, s, 1.60590430605664501629054e-10); - u = mla(u, s, -2.50521083763502045810755e-08); - u = mla(u, s, 2.75573192239198747630416e-06); - u = mla(u, s, -0.000198412698412696162806809); - u = mla(u, s, 0.00833333333333332974823815); - u = mla(u, s, -0.166666666666666657414808); + u = -7.97255955009037868891952e-18; + u = mla(u, s, 2.81009972710863200091251e-15); + u = mla(u, s, -7.64712219118158833288484e-13); + u = mla(u, s, 1.60590430605664501629054e-10); + u = mla(u, s, -2.50521083763502045810755e-08); + u = mla(u, s, 2.75573192239198747630416e-06); + u = mla(u, s, -0.000198412698412696162806809); + u = mla(u, s, 0.00833333333333332974823815); + u = mla(u, s, -0.166666666666666657414808); - u = mla(s, u * d, d); + u = mla(s, u * d, d); - return u; + return u; } __inline double2 xsincos(double d) { - int q; - double u, s, t; - double2 r; + int q; + double u, s, t; + double2 r; - q = (int)xrint(d * (2 * rtengine::RT_1_PI)); + q = (int)xrint(d * (2 * rtengine::RT_1_PI)); - s = d; + s = d; - s = mla(-q, PI4_A*2, s); - s = mla(-q, PI4_B*2, s); - s = mla(-q, PI4_C*2, s); + s = mla(-q, PI4_A*2, s); + s = mla(-q, PI4_B*2, s); + s = mla(-q, PI4_C*2, s); - t = s; + t = s; - s = s * s; + s = s * s; - u = 1.58938307283228937328511e-10; - u = mla(u, s, -2.50506943502539773349318e-08); - u = mla(u, s, 2.75573131776846360512547e-06); - u = mla(u, s, -0.000198412698278911770864914); - u = mla(u, s, 0.0083333333333191845961746); - u = mla(u, s, -0.166666666666666130709393); - u = u * s * t; + u = 1.58938307283228937328511e-10; + u = mla(u, s, -2.50506943502539773349318e-08); + u = mla(u, s, 2.75573131776846360512547e-06); + u = mla(u, s, -0.000198412698278911770864914); + u = mla(u, s, 0.0083333333333191845961746); + u = mla(u, s, -0.166666666666666130709393); + u = u * s * t; - r.x = t + u; + r.x = t + u; - u = -1.13615350239097429531523e-11; - u = mla(u, s, 2.08757471207040055479366e-09); - u = mla(u, s, -2.75573144028847567498567e-07); - u = mla(u, s, 2.48015872890001867311915e-05); - u = mla(u, s, -0.00138888888888714019282329); - u = mla(u, s, 0.0416666666666665519592062); - u = mla(u, s, -0.5); + u = -1.13615350239097429531523e-11; + u = mla(u, s, 2.08757471207040055479366e-09); + u = mla(u, s, -2.75573144028847567498567e-07); + u = mla(u, s, 2.48015872890001867311915e-05); + u = mla(u, s, -0.00138888888888714019282329); + u = mla(u, s, 0.0416666666666665519592062); + u = mla(u, s, -0.5); - r.y = u * s + 1; + r.y = u * s + 1; - if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 2) != 0) { r.x = -r.x; } - if (((q+1) & 2) != 0) { r.y = -r.y; } + if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 2) != 0) { r.x = -r.x; } + if (((q+1) & 2) != 0) { r.y = -r.y; } - if (xisinf(d)) { r.x = r.y = rtengine::RT_NAN; } + if (xisinf(d)) { r.x = r.y = rtengine::RT_NAN; } - return r; + return r; } __inline double xtan(double d) { - int q; - double u, s, x; + int q; + double u, s, x; - q = (int)xrint(d * (2 * rtengine::RT_1_PI)); + q = (int)xrint(d * (2 * rtengine::RT_1_PI)); - x = mla(q, -PI4_A*2, d); - x = mla(q, -PI4_B*2, x); - x = mla(q, -PI4_C*2, x); + x = mla(q, -PI4_A*2, d); + x = mla(q, -PI4_B*2, x); + x = mla(q, -PI4_C*2, x); - s = x * x; + s = x * x; - if ((q & 1) != 0) x = -x; + if ((q & 1) != 0) x = -x; - u = 1.01419718511083373224408e-05; - u = mla(u, s, -2.59519791585924697698614e-05); - u = mla(u, s, 5.23388081915899855325186e-05); - u = mla(u, s, -3.05033014433946488225616e-05); - u = mla(u, s, 7.14707504084242744267497e-05); - u = mla(u, s, 8.09674518280159187045078e-05); - u = mla(u, s, 0.000244884931879331847054404); - u = mla(u, s, 0.000588505168743587154904506); - u = mla(u, s, 0.00145612788922812427978848); - u = mla(u, s, 0.00359208743836906619142924); - u = mla(u, s, 0.00886323944362401618113356); - u = mla(u, s, 0.0218694882853846389592078); - u = mla(u, s, 0.0539682539781298417636002); - u = mla(u, s, 0.133333333333125941821962); - u = mla(u, s, 0.333333333333334980164153); + u = 1.01419718511083373224408e-05; + u = mla(u, s, -2.59519791585924697698614e-05); + u = mla(u, s, 5.23388081915899855325186e-05); + u = mla(u, s, -3.05033014433946488225616e-05); + u = mla(u, s, 7.14707504084242744267497e-05); + u = mla(u, s, 8.09674518280159187045078e-05); + u = mla(u, s, 0.000244884931879331847054404); + u = mla(u, s, 0.000588505168743587154904506); + u = mla(u, s, 0.00145612788922812427978848); + u = mla(u, s, 0.00359208743836906619142924); + u = mla(u, s, 0.00886323944362401618113356); + u = mla(u, s, 0.0218694882853846389592078); + u = mla(u, s, 0.0539682539781298417636002); + u = mla(u, s, 0.133333333333125941821962); + u = mla(u, s, 0.333333333333334980164153); - u = mla(s, u * x, x); + u = mla(s, u * x, x); - if ((q & 1) != 0) u = 1.0 / u; + if ((q & 1) != 0) u = 1.0 / u; - if (xisinf(d)) u = rtengine::RT_NAN; + if (xisinf(d)) u = rtengine::RT_NAN; - return u; + return u; } __inline double xlog(double d) { - double x, x2, t, m; - int e; + double x, x2, t, m; + int e; - e = ilogbp1(d * 0.7071); - m = ldexpk(d, -e); + e = ilogbp1(d * 0.7071); + m = ldexpk(d, -e); - x = (m-1) / (m+1); - x2 = x * x; + x = (m-1) / (m+1); + x2 = x * x; - t = 0.148197055177935105296783; - t = mla(t, x2, 0.153108178020442575739679); - t = mla(t, x2, 0.181837339521549679055568); - t = mla(t, x2, 0.22222194152736701733275); - t = mla(t, x2, 0.285714288030134544449368); - t = mla(t, x2, 0.399999999989941956712869); - t = mla(t, x2, 0.666666666666685503450651); - t = mla(t, x2, 2); + t = 0.148197055177935105296783; + t = mla(t, x2, 0.153108178020442575739679); + t = mla(t, x2, 0.181837339521549679055568); + t = mla(t, x2, 0.22222194152736701733275); + t = mla(t, x2, 0.285714288030134544449368); + t = mla(t, x2, 0.399999999989941956712869); + t = mla(t, x2, 0.666666666666685503450651); + t = mla(t, x2, 2); - x = x * t + 0.693147180559945286226764 * e; + x = x * t + 0.693147180559945286226764 * e; - if (xisinf(d)) x = rtengine::RT_INFINITY; - if (d < 0) x = rtengine::RT_NAN; - if (d == 0) x = -rtengine::RT_INFINITY; + if (xisinf(d)) x = rtengine::RT_INFINITY; + if (d < 0) x = rtengine::RT_NAN; + if (d == 0) x = -rtengine::RT_INFINITY; - return x; + return x; } __inline double xexp(double d) { - int q = (int)xrint(d * R_LN2); - double s, u; + int q = (int)xrint(d * R_LN2); + double s, u; - s = mla(q, -L2U, d); - s = mla(q, -L2L, s); + s = mla(q, -L2U, d); + s = mla(q, -L2L, s); - u = 2.08860621107283687536341e-09; - u = mla(u, s, 2.51112930892876518610661e-08); - u = mla(u, s, 2.75573911234900471893338e-07); - u = mla(u, s, 2.75572362911928827629423e-06); - u = mla(u, s, 2.4801587159235472998791e-05); - u = mla(u, s, 0.000198412698960509205564975); - u = mla(u, s, 0.00138888888889774492207962); - u = mla(u, s, 0.00833333333331652721664984); - u = mla(u, s, 0.0416666666666665047591422); - u = mla(u, s, 0.166666666666666851703837); - u = mla(u, s, 0.5); + u = 2.08860621107283687536341e-09; + u = mla(u, s, 2.51112930892876518610661e-08); + u = mla(u, s, 2.75573911234900471893338e-07); + u = mla(u, s, 2.75572362911928827629423e-06); + u = mla(u, s, 2.4801587159235472998791e-05); + u = mla(u, s, 0.000198412698960509205564975); + u = mla(u, s, 0.00138888888889774492207962); + u = mla(u, s, 0.00833333333331652721664984); + u = mla(u, s, 0.0416666666666665047591422); + u = mla(u, s, 0.166666666666666851703837); + u = mla(u, s, 0.5); - u = s * s * u + s + 1; - u = ldexpk(u, q); + u = s * s * u + s + 1; + u = ldexpk(u, q); - if (xisminf(d)) u = 0; + if (xisminf(d)) u = 0; - return u; + return u; } __inline double2 logk(double d) { - double2 x, x2; - double m, t; - int e; + double2 x, x2; + double m, t; + int e; - e = ilogbp1(d * 0.7071); - m = ldexpk(d, -e); + e = ilogbp1(d * 0.7071); + m = ldexpk(d, -e); - x = div_dd(add2_ss(-1, m), add2_ss(1, m)); - x2 = squ_d(x); + x = div_dd(add2_ss(-1, m), add2_ss(1, m)); + x2 = squ_d(x); - t = 0.134601987501262130076155; - t = mla(t, x2.x, 0.132248509032032670243288); - t = mla(t, x2.x, 0.153883458318096079652524); - t = mla(t, x2.x, 0.181817427573705403298686); - t = mla(t, x2.x, 0.222222231326187414840781); - t = mla(t, x2.x, 0.285714285651261412873718); - t = mla(t, x2.x, 0.400000000000222439910458); - t = mla(t, x2.x, 0.666666666666666371239645); + t = 0.134601987501262130076155; + t = mla(t, x2.x, 0.132248509032032670243288); + t = mla(t, x2.x, 0.153883458318096079652524); + t = mla(t, x2.x, 0.181817427573705403298686); + t = mla(t, x2.x, 0.222222231326187414840781); + t = mla(t, x2.x, 0.285714285651261412873718); + t = mla(t, x2.x, 0.400000000000222439910458); + t = mla(t, x2.x, 0.666666666666666371239645); - return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e), - add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t))); + return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e), + add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t))); } __inline double expk(double2 d) { - int q = (int)rint((d.x + d.y) * R_LN2); - double2 s, t; - double u; + int q = (int)rint((d.x + d.y) * R_LN2); + double2 s, t; + double u; - s = add2_ds(d, q * -L2U); - s = add2_ds(s, q * -L2L); + s = add2_ds(d, q * -L2U); + s = add2_ds(s, q * -L2L); - s = normalize_d(s); + s = normalize_d(s); - u = 2.51069683420950419527139e-08; - u = mla(u, s.x, 2.76286166770270649116855e-07); - u = mla(u, s.x, 2.75572496725023574143864e-06); - u = mla(u, s.x, 2.48014973989819794114153e-05); - u = mla(u, s.x, 0.000198412698809069797676111); - u = mla(u, s.x, 0.0013888888939977128960529); - u = mla(u, s.x, 0.00833333333332371417601081); - u = mla(u, s.x, 0.0416666666665409524128449); - u = mla(u, s.x, 0.166666666666666740681535); - u = mla(u, s.x, 0.500000000000000999200722); + u = 2.51069683420950419527139e-08; + u = mla(u, s.x, 2.76286166770270649116855e-07); + u = mla(u, s.x, 2.75572496725023574143864e-06); + u = mla(u, s.x, 2.48014973989819794114153e-05); + u = mla(u, s.x, 0.000198412698809069797676111); + u = mla(u, s.x, 0.0013888888939977128960529); + u = mla(u, s.x, 0.00833333333332371417601081); + u = mla(u, s.x, 0.0416666666665409524128449); + u = mla(u, s.x, 0.166666666666666740681535); + u = mla(u, s.x, 0.500000000000000999200722); - t = add_dd(s, mul_ds(squ_d(s), u)); + t = add_dd(s, mul_ds(squ_d(s), u)); - t = add_sd(1, t); - return ldexpk(t.x + t.y, q); + t = add_sd(1, t); + return ldexpk(t.x + t.y, q); } __inline double xpow(double x, double y) { - int yisint = (int)y == y; - int yisodd = (1 & (int)y) != 0 && yisint; + int yisint = (int)y == y; + int yisodd = (1 & (int)y) != 0 && yisint; - double result = expk(mul_ds(logk(xfabs(x)), y)); + double result = expk(mul_ds(logk(xfabs(x)), y)); - result = xisnan(result) ? rtengine::RT_INFINITY : result; - result *= (x >= 0 ? 1 : (!yisint ? rtengine::RT_NAN : (yisodd ? -1 : 1))); + result = xisnan(result) ? rtengine::RT_INFINITY : result; + result *= (x >= 0 ? 1 : (!yisint ? rtengine::RT_NAN : (yisodd ? -1 : 1))); - double efx = mulsign(xfabs(x) - 1, y); - if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : rtengine::RT_INFINITY); - if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : rtengine::RT_INFINITY); - if (xisnan(x) || xisnan(y)) result = rtengine::RT_NAN; - if (y == 0 || x == 1) result = 1; + double efx = mulsign(xfabs(x) - 1, y); + if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : rtengine::RT_INFINITY); + if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : rtengine::RT_INFINITY); + if (xisnan(x) || xisnan(y)) result = rtengine::RT_NAN; + if (y == 0 || x == 1) result = 1; - return result; + return result; } __inline double2 expk2(double2 d) { - int q = (int)rint((d.x + d.y) * R_LN2); - double2 s, t; - double u; + int q = (int)rint((d.x + d.y) * R_LN2); + double2 s, t; + double u; - s = add2_ds(d, q * -L2U); - s = add2_ds(s, q * -L2L); + s = add2_ds(d, q * -L2U); + s = add2_ds(s, q * -L2L); - s = normalize_d(s); + s = normalize_d(s); - u = 2.51069683420950419527139e-08; - u = mla(u, s.x, 2.76286166770270649116855e-07); - u = mla(u, s.x, 2.75572496725023574143864e-06); - u = mla(u, s.x, 2.48014973989819794114153e-05); - u = mla(u, s.x, 0.000198412698809069797676111); - u = mla(u, s.x, 0.0013888888939977128960529); - u = mla(u, s.x, 0.00833333333332371417601081); - u = mla(u, s.x, 0.0416666666665409524128449); - u = mla(u, s.x, 0.166666666666666740681535); - u = mla(u, s.x, 0.500000000000000999200722); + u = 2.51069683420950419527139e-08; + u = mla(u, s.x, 2.76286166770270649116855e-07); + u = mla(u, s.x, 2.75572496725023574143864e-06); + u = mla(u, s.x, 2.48014973989819794114153e-05); + u = mla(u, s.x, 0.000198412698809069797676111); + u = mla(u, s.x, 0.0013888888939977128960529); + u = mla(u, s.x, 0.00833333333332371417601081); + u = mla(u, s.x, 0.0416666666665409524128449); + u = mla(u, s.x, 0.166666666666666740681535); + u = mla(u, s.x, 0.500000000000000999200722); - t = add_dd(s, mul_ds(squ_d(s), u)); + t = add_dd(s, mul_ds(squ_d(s), u)); - t = add_sd(1, t); - return dd(ldexpk(t.x, q), ldexpk(t.y, q)); + t = add_sd(1, t); + return dd(ldexpk(t.x, q), ldexpk(t.y, q)); } __inline double xsinh(double x) { - double y = xfabs(x); - double2 d = expk2(dd(y, 0)); - d = add2_dd(d, div_dd(dd(-1, 0), d)); - y = (d.x + d.y) * 0.5; + double y = xfabs(x); + double2 d = expk2(dd(y, 0)); + d = add2_dd(d, div_dd(dd(-1, 0), d)); + y = (d.x + d.y) * 0.5; - y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; - y = mulsign(y, x); - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; + y = mulsign(y, x); + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } __inline double xcosh(double x) { - double2 d = expk2(dd(x, 0)); - d = add2_dd(d, div_dd(dd(1, 0), d)); - double y = (d.x + d.y) * 0.5; + double2 d = expk2(dd(x, 0)); + d = add2_dd(d, div_dd(dd(1, 0), d)); + double y = (d.x + d.y) * 0.5; - y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } __inline double xtanh(double x) { - double y = xfabs(x); - double2 d = expk2(dd(y, 0)); - double2 e = div_dd(dd(1, 0), d); - d = div_dd(add2_dd(d, scale_d(e, -1)), add2_dd(d, e)); - y = d.x + d.y; + double y = xfabs(x); + double2 d = expk2(dd(y, 0)); + double2 e = div_dd(dd(1, 0), d); + d = div_dd(add2_dd(d, scale_d(e, -1)), add2_dd(d, e)); + y = d.x + d.y; - y = xisinf(x) || xisnan(y) ? 1.0 : y; - y = mulsign(y, x); - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? 1.0 : y; + y = mulsign(y, x); + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } __inline double2 logk2(double2 d) { - double2 x, x2, m; - double t; - int e; + double2 x, x2, m; + double t; + int e; - d = normalize_d(d); - e = ilogbp1(d.x * 0.7071); - m = scale_d(d, ldexpk(1, -e)); + d = normalize_d(d); + e = ilogbp1(d.x * 0.7071); + m = scale_d(d, ldexpk(1, -e)); - x = div_dd(add2_ds(m, -1), add2_ds(m, 1)); - x2 = squ_d(x); + x = div_dd(add2_ds(m, -1), add2_ds(m, 1)); + x2 = squ_d(x); - t = 0.134601987501262130076155; - t = mla(t, x2.x, 0.132248509032032670243288); - t = mla(t, x2.x, 0.153883458318096079652524); - t = mla(t, x2.x, 0.181817427573705403298686); - t = mla(t, x2.x, 0.222222231326187414840781); - t = mla(t, x2.x, 0.285714285651261412873718); - t = mla(t, x2.x, 0.400000000000222439910458); - t = mla(t, x2.x, 0.666666666666666371239645); + t = 0.134601987501262130076155; + t = mla(t, x2.x, 0.132248509032032670243288); + t = mla(t, x2.x, 0.153883458318096079652524); + t = mla(t, x2.x, 0.181817427573705403298686); + t = mla(t, x2.x, 0.222222231326187414840781); + t = mla(t, x2.x, 0.285714285651261412873718); + t = mla(t, x2.x, 0.400000000000222439910458); + t = mla(t, x2.x, 0.666666666666666371239645); - return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e), - add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t))); + return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e), + add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t))); } __inline double xasinh(double x) { - double y = xfabs(x); - double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(y, y), 1)), y)); - y = d.x + d.y; + double y = xfabs(x); + double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(y, y), 1)), y)); + y = d.x + d.y; - y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; - y = mulsign(y, x); - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; + y = mulsign(y, x); + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } __inline double xacosh(double x) { - double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(x, x), -1)), x)); - double y = d.x + d.y; + double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(x, x), -1)), x)); + double y = d.x + d.y; - y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; - y = x == 1.0 ? 0.0 : y; - y = x < 1.0 ? rtengine::RT_NAN : y; - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? rtengine::RT_INFINITY : y; + y = x == 1.0 ? 0.0 : y; + y = x < 1.0 ? rtengine::RT_NAN : y; + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } __inline double xatanh(double x) { - double y = xfabs(x); - double2 d = logk2(div_dd(add2_ss(1, y), add2_ss(1, -y))); - y = y > 1.0 ? rtengine::RT_NAN : (y == 1.0 ? rtengine::RT_INFINITY : (d.x + d.y) * 0.5); + double y = xfabs(x); + double2 d = logk2(div_dd(add2_ss(1, y), add2_ss(1, -y))); + y = y > 1.0 ? rtengine::RT_NAN : (y == 1.0 ? rtengine::RT_INFINITY : (d.x + d.y) * 0.5); - y = xisinf(x) || xisnan(y) ? rtengine::RT_NAN : y; - y = mulsign(y, x); - y = xisnan(x) ? rtengine::RT_NAN : y; + y = xisinf(x) || xisnan(y) ? rtengine::RT_NAN : y; + y = mulsign(y, x); + y = xisnan(x) ? rtengine::RT_NAN : y; - return y; + return y; } // __inline double xfma(double x, double y, double z) { - union { - double f; - long long int i; - } tmp; + union { + double f; + long long int i; + } tmp; - tmp.f = x; - tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL; - double xh = tmp.f, xl = x - xh; + tmp.f = x; + tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL; + double xh = tmp.f, xl = x - xh; - tmp.f = y; - tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL; - double yh = tmp.f, yl = y - yh; + tmp.f = y; + tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL; + double yh = tmp.f, yl = y - yh; - double h = x * y; - double l = xh * yh - h + xl * yh + xh * yl + xl * yl; + double h = x * y; + double l = xh * yh - h + xl * yh + xh * yl + xl * yl; - double h2, l2, v; + double h2, l2, v; - h2 = h + z; - v = h2 - h; - l2 = (h - (h2 - v)) + (z - v) + l; + h2 = h + z; + v = h2 - h; + l2 = (h - (h2 - v)) + (z - v) + l; - return h2 + l2; + return h2 + l2; } __inline double xsqrt(double d) { // max error : 0.5 ulp - double q = 1; + double q = 1; - if (d < 8.636168555094445E-78) { - d *= 1.157920892373162E77; - q = 2.9387358770557188E-39; - } + if (d < 8.636168555094445E-78) { + d *= 1.157920892373162E77; + q = 2.9387358770557188E-39; + } - // http://en.wikipedia.org/wiki/Fast_inverse_square_root - double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1)); + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1)); - x = x * (1.5 - 0.5 * d * x * x); - x = x * (1.5 - 0.5 * d * x * x); - x = x * (1.5 - 0.5 * d * x * x); + x = x * (1.5 - 0.5 * d * x * x); + x = x * (1.5 - 0.5 * d * x * x); + x = x * (1.5 - 0.5 * d * x * x); - // You can change xfma to fma if fma is correctly implemented - x = xfma(d * x, d * x, -d) * (x * -0.5) + d * x; + // You can change xfma to fma if fma is correctly implemented + x = xfma(d * x, d * x, -d) * (x * -0.5) + d * x; - return d == rtengine::RT_INFINITY ? rtengine::RT_INFINITY : x * q; + return d == rtengine::RT_INFINITY ? rtengine::RT_INFINITY : x * q; } __inline double xcbrt(double d) { // max error : 2 ulps - double x, y, q = 1.0; - int e, r; + double x, y, q = 1.0; + int e, r; - e = ilogbp1(d); - d = ldexpk(d, -e); - r = (e + 6144) % 3; - q = (r == 1) ? 1.2599210498948731647672106 : q; - q = (r == 2) ? 1.5874010519681994747517056 : q; - q = ldexpk(q, (e + 6144) / 3 - 2048); + e = ilogbp1(d); + d = ldexpk(d, -e); + r = (e + 6144) % 3; + q = (r == 1) ? 1.2599210498948731647672106 : q; + q = (r == 2) ? 1.5874010519681994747517056 : q; + q = ldexpk(q, (e + 6144) / 3 - 2048); - q = mulsign(q, d); - d = xfabs(d); + q = mulsign(q, d); + d = xfabs(d); - x = -0.640245898480692909870982; - x = x * d + 2.96155103020039511818595; - x = x * d + -5.73353060922947843636166; - x = x * d + 6.03990368989458747961407; - x = x * d + -3.85841935510444988821632; - x = x * d + 2.2307275302496609725722; + x = -0.640245898480692909870982; + x = x * d + 2.96155103020039511818595; + x = x * d + -5.73353060922947843636166; + x = x * d + 6.03990368989458747961407; + x = x * d + -3.85841935510444988821632; + x = x * d + 2.2307275302496609725722; - y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); - y = d * x * x; - y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q; + y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); + y = d * x * x; + y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q; - return y; + return y; } __inline double xexp2(double a) { - double u = expk(mul_ds(dd(0.69314718055994528623, 2.3190468138462995584e-17), a)); - if (xispinf(a)) u = rtengine::RT_INFINITY; - if (xisminf(a)) u = 0; - return u; + double u = expk(mul_ds(dd(0.69314718055994528623, 2.3190468138462995584e-17), a)); + if (xispinf(a)) u = rtengine::RT_INFINITY; + if (xisminf(a)) u = 0; + return u; } __inline double xexp10(double a) { - double u = expk(mul_ds(dd(2.3025850929940459011, -2.1707562233822493508e-16), a)); - if (xispinf(a)) u = rtengine::RT_INFINITY; - if (xisminf(a)) u = 0; - return u; + double u = expk(mul_ds(dd(2.3025850929940459011, -2.1707562233822493508e-16), a)); + if (xispinf(a)) u = rtengine::RT_INFINITY; + if (xisminf(a)) u = 0; + return u; } __inline double xexpm1(double a) { - double2 d = add2_ds(expk2(dd(a, 0)), -1.0); - double x = d.x + d.y; - if (xispinf(a)) x = rtengine::RT_INFINITY; - if (xisminf(a)) x = -1; - return x; + double2 d = add2_ds(expk2(dd(a, 0)), -1.0); + double x = d.x + d.y; + if (xispinf(a)) x = rtengine::RT_INFINITY; + if (xisminf(a)) x = -1; + return x; } __inline double xlog10(double a) { - double2 d = mul_dd(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17)); - double x = d.x + d.y; + double2 d = mul_dd(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17)); + double x = d.x + d.y; - if (xisinf(a)) x = rtengine::RT_INFINITY; - if (a < 0) x = rtengine::RT_NAN; - if (a == 0) x = -rtengine::RT_INFINITY; + if (xisinf(a)) x = rtengine::RT_INFINITY; + if (a < 0) x = rtengine::RT_NAN; + if (a == 0) x = -rtengine::RT_INFINITY; - return x; + return x; } __inline double xlog1p(double a) { - double2 d = logk2(add2_ss(a, 1)); - double x = d.x + d.y; + double2 d = logk2(add2_ss(a, 1)); + double x = d.x + d.y; - if (xisinf(a)) x = rtengine::RT_INFINITY; - if (a < -1) x = rtengine::RT_NAN; - if (a == -1) x = -rtengine::RT_INFINITY; + if (xisinf(a)) x = rtengine::RT_INFINITY; + if (a < -1) x = rtengine::RT_NAN; + if (a == -1) x = -rtengine::RT_INFINITY; - return x; + return x; } /////////////////////////////////////////// @@ -897,29 +897,29 @@ __inline double xlog1p(double a) { #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f __inline int32_t floatToRawIntBits(float d) { - union { - float f; - int32_t i; - } tmp; - tmp.f = d; - return tmp.i; + union { + float f; + int32_t i; + } tmp; + tmp.f = d; + return tmp.i; } __inline float intBitsToFloat(int32_t i) { - union { - float f; - int32_t i; - } tmp; - tmp.i = i; - return tmp.f; + union { + float f; + int32_t i; + } tmp; + tmp.i = i; + return tmp.f; } __inline float xfabsf(float x) { - return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x)); + return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x)); } __inline float mulsignf(float x, float y) { - return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31))); + return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31))); } __inline float signf(float d) { return copysign(1, d); } @@ -931,337 +931,337 @@ __inline int xisminff(float x) { return x == -rtengine::RT_INFINITY_F; } __inline int xispinff(float x) { return x == rtengine::RT_INFINITY_F; } __inline int ilogbp1f(float d) { - int m = d < 5.421010862427522E-20f; - d = m ? 1.8446744073709552E19f * d : d; - int q = (floatToRawIntBits(d) >> 23) & 0xff; - q = m ? q - (64 + 0x7e) : q - 0x7e; - return q; + int m = d < 5.421010862427522E-20f; + d = m ? 1.8446744073709552E19f * d : d; + int q = (floatToRawIntBits(d) >> 23) & 0xff; + q = m ? q - (64 + 0x7e) : q - 0x7e; + return q; } __inline float ldexpkf(float x, int q) { - float u; - int m; - m = q >> 31; - m = (((m + q) >> 6) - m) << 4; - q = q - (m << 2); - u = intBitsToFloat(((int32_t)(m + 0x7f)) << 23); - u = u * u; - x = x * u * u; - u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23); - return x * u; + float u; + int m; + m = q >> 31; + m = (((m + q) >> 6) - m) << 4; + q = q - (m << 2); + u = intBitsToFloat(((int32_t)(m + 0x7f)) << 23); + u = u * u; + x = x * u * u; + u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23); + return x * u; } __inline float xcbrtf(float d) { // max error : 2 ulps - float x, y, q = 1.0f; - int e, r; + float x, y, q = 1.0f; + int e, r; - e = ilogbp1f(d); - d = ldexpkf(d, -e); - r = (e + 6144) % 3; - q = (r == 1) ? 1.2599210498948731647672106f : q; - q = (r == 2) ? 1.5874010519681994747517056f : q; - q = ldexpkf(q, (e + 6144) / 3 - 2048); + e = ilogbp1f(d); + d = ldexpkf(d, -e); + r = (e + 6144) % 3; + q = (r == 1) ? 1.2599210498948731647672106f : q; + q = (r == 2) ? 1.5874010519681994747517056f : q; + q = ldexpkf(q, (e + 6144) / 3 - 2048); - q = mulsignf(q, d); - d = xfabsf(d); + q = mulsignf(q, d); + d = xfabsf(d); - x = -0.601564466953277587890625f; - x = mlaf(x, d, 2.8208892345428466796875f); - x = mlaf(x, d, -5.532182216644287109375f); - x = mlaf(x, d, 5.898262500762939453125f); - x = mlaf(x, d, -3.8095417022705078125f); - x = mlaf(x, d, 2.2241256237030029296875f); + x = -0.601564466953277587890625f; + x = mlaf(x, d, 2.8208892345428466796875f); + x = mlaf(x, d, -5.532182216644287109375f); + x = mlaf(x, d, 5.898262500762939453125f); + x = mlaf(x, d, -3.8095417022705078125f); + x = mlaf(x, d, 2.2241256237030029296875f); - y = d * x * x; - y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q; + y = d * x * x; + y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q; - return y; + return y; } __inline float xsinf(float d) { - int q; - float u, s; + int q; + float u, s; - q = rint(d * rtengine::RT_1_PI_F); + q = rint(d * rtengine::RT_1_PI_F); - d = mlaf(q, -PI4_Af*4, d); - d = mlaf(q, -PI4_Bf*4, d); - d = mlaf(q, -PI4_Cf*4, d); - d = mlaf(q, -PI4_Df*4, d); + d = mlaf(q, -PI4_Af*4, d); + d = mlaf(q, -PI4_Bf*4, d); + d = mlaf(q, -PI4_Cf*4, d); + d = mlaf(q, -PI4_Df*4, d); - s = d * d; + s = d * d; - if ((q & 1) != 0) d = -d; + if ((q & 1) != 0) d = -d; - u = 2.6083159809786593541503e-06f; - u = mlaf(u, s, -0.0001981069071916863322258f); - u = mlaf(u, s, 0.00833307858556509017944336f); - u = mlaf(u, s, -0.166666597127914428710938f); + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s, -0.0001981069071916863322258f); + u = mlaf(u, s, 0.00833307858556509017944336f); + u = mlaf(u, s, -0.166666597127914428710938f); - u = mlaf(s, u * d, d); + u = mlaf(s, u * d, d); - return u; + return u; } __inline float xcosf(float d) { #ifdef __SSE2__ - // faster than scalar version - return xcosf(_mm_set_ss(d))[0]; + // faster than scalar version + return xcosf(_mm_set_ss(d))[0]; #else - int q; - float u, s; + int q; + float u, s; - q = 1 + 2*rint(d * rtengine::RT_1_PI_F - 0.5f); + q = 1 + 2*rint(d * rtengine::RT_1_PI_F - 0.5f); - d = mlaf(q, -PI4_Af*2, d); - d = mlaf(q, -PI4_Bf*2, d); - d = mlaf(q, -PI4_Cf*2, d); - d = mlaf(q, -PI4_Df*2, d); + d = mlaf(q, -PI4_Af*2, d); + d = mlaf(q, -PI4_Bf*2, d); + d = mlaf(q, -PI4_Cf*2, d); + d = mlaf(q, -PI4_Df*2, d); - s = d * d; + s = d * d; - if ((q & 2) == 0) d = -d; + if ((q & 2) == 0) d = -d; - u = 2.6083159809786593541503e-06f; - u = mlaf(u, s, -0.0001981069071916863322258f); - u = mlaf(u, s, 0.00833307858556509017944336f); - u = mlaf(u, s, -0.166666597127914428710938f); + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s, -0.0001981069071916863322258f); + u = mlaf(u, s, 0.00833307858556509017944336f); + u = mlaf(u, s, -0.166666597127914428710938f); - u = mlaf(s, u * d, d); + u = mlaf(s, u * d, d); - return u; + return u; #endif } __inline float2 xsincosf(float d) { #ifdef __SSE2__ - // faster than scalar version + // faster than scalar version vfloat2 res = xsincosf(_mm_set_ss(d)); return {res.x[0], res.y[0]}; #else - int q; - float u, s, t; - float2 r; + int q; + float u, s, t; + float2 r; - q = rint(d * rtengine::RT_2_PI_F); + q = rint(d * rtengine::RT_2_PI_F); - s = d; + s = d; - s = mlaf(q, -PI4_Af*2, s); - s = mlaf(q, -PI4_Bf*2, s); - s = mlaf(q, -PI4_Cf*2, s); - s = mlaf(q, -PI4_Df*2, s); + s = mlaf(q, -PI4_Af*2, s); + s = mlaf(q, -PI4_Bf*2, s); + s = mlaf(q, -PI4_Cf*2, s); + s = mlaf(q, -PI4_Df*2, s); - t = s; + t = s; - s = s * s; + s = s * s; - u = -0.000195169282960705459117889f; - u = mlaf(u, s, 0.00833215750753879547119141f); - u = mlaf(u, s, -0.166666537523269653320312f); - u = u * s * t; + u = -0.000195169282960705459117889f; + u = mlaf(u, s, 0.00833215750753879547119141f); + u = mlaf(u, s, -0.166666537523269653320312f); + u = u * s * t; - r.x = t + u; + r.x = t + u; - u = -2.71811842367242206819355e-07f; - u = mlaf(u, s, 2.47990446951007470488548e-05f); - u = mlaf(u, s, -0.00138888787478208541870117f); - u = mlaf(u, s, 0.0416666641831398010253906f); - u = mlaf(u, s, -0.5f); + u = -2.71811842367242206819355e-07f; + u = mlaf(u, s, 2.47990446951007470488548e-05f); + u = mlaf(u, s, -0.00138888787478208541870117f); + u = mlaf(u, s, 0.0416666641831398010253906f); + u = mlaf(u, s, -0.5f); - r.y = u * s + 1; + r.y = u * s + 1; - if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 2) != 0) { r.x = -r.x; } - if (((q+1) & 2) != 0) { r.y = -r.y; } + if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 2) != 0) { r.x = -r.x; } + if (((q+1) & 2) != 0) { r.y = -r.y; } - if (xisinff(d)) { r.x = r.y = rtengine::RT_NAN_F; } + if (xisinff(d)) { r.x = r.y = rtengine::RT_NAN_F; } - return r; + return r; #endif } __inline float xtanf(float d) { - int q; - float u, s, x; + int q; + float u, s, x; - q = rint(d * (float)(2 * rtengine::RT_1_PI)); + q = rint(d * (float)(2 * rtengine::RT_1_PI)); - x = d; + x = d; - x = mlaf(q, -PI4_Af*2, x); - x = mlaf(q, -PI4_Bf*2, x); - x = mlaf(q, -PI4_Cf*2, x); - x = mlaf(q, -PI4_Df*2, x); + x = mlaf(q, -PI4_Af*2, x); + x = mlaf(q, -PI4_Bf*2, x); + x = mlaf(q, -PI4_Cf*2, x); + x = mlaf(q, -PI4_Df*2, x); - s = x * x; + s = x * x; - if ((q & 1) != 0) x = -x; + if ((q & 1) != 0) x = -x; - u = 0.00927245803177356719970703f; - u = mlaf(u, s, 0.00331984995864331722259521f); - u = mlaf(u, s, 0.0242998078465461730957031f); - u = mlaf(u, s, 0.0534495301544666290283203f); - u = mlaf(u, s, 0.133383005857467651367188f); - u = mlaf(u, s, 0.333331853151321411132812f); + u = 0.00927245803177356719970703f; + u = mlaf(u, s, 0.00331984995864331722259521f); + u = mlaf(u, s, 0.0242998078465461730957031f); + u = mlaf(u, s, 0.0534495301544666290283203f); + u = mlaf(u, s, 0.133383005857467651367188f); + u = mlaf(u, s, 0.333331853151321411132812f); - u = mlaf(s, u * x, x); + u = mlaf(s, u * x, x); - if ((q & 1) != 0) u = 1.0f / u; + if ((q & 1) != 0) u = 1.0f / u; - if (xisinff(d)) u = rtengine::RT_NAN_F; + if (xisinff(d)) u = rtengine::RT_NAN_F; - return u; + return u; } __inline float xatanf(float s) { - float t, u; - int q = 0; + float t, u; + int q = 0; - if (s < 0) { s = -s; q = 2; } - if (s > 1) { s = 1.0f / s; q |= 1; } + if (s < 0) { s = -s; q = 2; } + if (s > 1) { s = 1.0f / s; q |= 1; } - t = s * s; + t = s * s; - u = 0.00282363896258175373077393f; - u = mlaf(u, t, -0.0159569028764963150024414f); - u = mlaf(u, t, 0.0425049886107444763183594f); - u = mlaf(u, t, -0.0748900920152664184570312f); - u = mlaf(u, t, 0.106347933411598205566406f); - u = mlaf(u, t, -0.142027363181114196777344f); - u = mlaf(u, t, 0.199926957488059997558594f); - u = mlaf(u, t, -0.333331018686294555664062f); + u = 0.00282363896258175373077393f; + u = mlaf(u, t, -0.0159569028764963150024414f); + u = mlaf(u, t, 0.0425049886107444763183594f); + u = mlaf(u, t, -0.0748900920152664184570312f); + u = mlaf(u, t, 0.106347933411598205566406f); + u = mlaf(u, t, -0.142027363181114196777344f); + u = mlaf(u, t, 0.199926957488059997558594f); + u = mlaf(u, t, -0.333331018686294555664062f); - t = s + s * (t * u); + t = s + s * (t * u); - if ((q & 1) != 0) t = 1.570796326794896557998982f - t; - if ((q & 2) != 0) t = -t; + if ((q & 1) != 0) t = 1.570796326794896557998982f - t; + if ((q & 2) != 0) t = -t; - return t; + return t; } __inline float atan2kf(float y, float x) { - float s, t, u; - float q = 0.f; + float s, t, u; + float q = 0.f; - if (x < 0) { x = -x; q = -2.f; } - if (y > x) { t = x; x = y; y = -t; q += 1.f; } + if (x < 0) { x = -x; q = -2.f; } + if (y > x) { t = x; x = y; y = -t; q += 1.f; } - s = y / x; - t = s * s; + s = y / x; + t = s * s; - u = 0.00282363896258175373077393f; - u = mlaf(u, t, -0.0159569028764963150024414f); - u = mlaf(u, t, 0.0425049886107444763183594f); - u = mlaf(u, t, -0.0748900920152664184570312f); - u = mlaf(u, t, 0.106347933411598205566406f); - u = mlaf(u, t, -0.142027363181114196777344f); - u = mlaf(u, t, 0.199926957488059997558594f); - u = mlaf(u, t, -0.333331018686294555664062f); + u = 0.00282363896258175373077393f; + u = mlaf(u, t, -0.0159569028764963150024414f); + u = mlaf(u, t, 0.0425049886107444763183594f); + u = mlaf(u, t, -0.0748900920152664184570312f); + u = mlaf(u, t, 0.106347933411598205566406f); + u = mlaf(u, t, -0.142027363181114196777344f); + u = mlaf(u, t, 0.199926957488059997558594f); + u = mlaf(u, t, -0.333331018686294555664062f); - t = u * t; - t = mlaf(t,s,s); - return mlaf(q,(float)(rtengine::RT_PI_F_2),t); + t = u * t; + t = mlaf(t,s,s); + return mlaf(q,(float)(rtengine::RT_PI_F_2),t); } __inline float xatan2f(float y, float x) { - float r = atan2kf(xfabsf(y), x); + float r = atan2kf(xfabsf(y), x); - r = mulsignf(r, x); - if (xisinff(x) || x == 0) r = rtengine::RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(rtengine::RT_PI_F*.5f)) : 0); - if (xisinff(y) ) r = rtengine::RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(rtengine::RT_PI_F*.25f)) : 0); - if ( y == 0) r = (signf(x) == -1 ? rtengine::RT_PI_F : 0); + r = mulsignf(r, x); + if (xisinff(x) || x == 0) r = rtengine::RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(rtengine::RT_PI_F*.5f)) : 0); + if (xisinff(y) ) r = rtengine::RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(rtengine::RT_PI_F*.25f)) : 0); + if ( y == 0) r = (signf(x) == -1 ? rtengine::RT_PI_F : 0); - return xisnanf(x) || xisnanf(y) ? rtengine::RT_NAN_F : mulsignf(r, y); + return xisnanf(x) || xisnanf(y) ? rtengine::RT_NAN_F : mulsignf(r, y); } __inline float xasinf(float d) { - return mulsignf(atan2kf(fabsf(d), sqrtf((1.0f+d)*(1.0f-d))), d); + return mulsignf(atan2kf(fabsf(d), sqrtf((1.0f+d)*(1.0f-d))), d); } __inline float xacosf(float d) { - return mulsignf(atan2kf(sqrtf((1.0f+d)*(1.0f-d)), fabsf(d)), d) + (d < 0 ? (float)rtengine::RT_PI : 0.0f); + return mulsignf(atan2kf(sqrtf((1.0f+d)*(1.0f-d)), fabsf(d)), d) + (d < 0 ? (float)rtengine::RT_PI : 0.0f); } __inline float xlogf(float d) { - float x, x2, t, m; - int e; + float x, x2, t, m; + int e; - e = ilogbp1f(d * 0.7071f); - m = ldexpkf(d, -e); + e = ilogbp1f(d * 0.7071f); + m = ldexpkf(d, -e); - x = (m-1.0f) / (m+1.0f); - x2 = x * x; + x = (m-1.0f) / (m+1.0f); + x2 = x * x; - t = 0.2371599674224853515625f; - t = mlaf(t, x2, 0.285279005765914916992188f); - t = mlaf(t, x2, 0.400005519390106201171875f); - t = mlaf(t, x2, 0.666666567325592041015625f); - t = mlaf(t, x2, 2.0f); + t = 0.2371599674224853515625f; + t = mlaf(t, x2, 0.285279005765914916992188f); + t = mlaf(t, x2, 0.400005519390106201171875f); + t = mlaf(t, x2, 0.666666567325592041015625f); + t = mlaf(t, x2, 2.0f); - x = x * t + 0.693147180559945286226764f * e; + x = x * t + 0.693147180559945286226764f * e; - if (xisinff(d)) x = rtengine::RT_INFINITY_F; - if (d < 0) x = rtengine::RT_NAN_F; - if (d == 0) x = -rtengine::RT_INFINITY_F; + if (xisinff(d)) x = rtengine::RT_INFINITY_F; + if (d < 0) x = rtengine::RT_NAN_F; + if (d == 0) x = -rtengine::RT_INFINITY_F; - return x; + return x; } __inline float xexpf(float d) { - if(d<=-104.0f) return 0.0f; + if(d<=-104.0f) return 0.0f; - int q = rint(d * R_LN2f); - float s, u; + int q = rint(d * R_LN2f); + float s, u; - s = mlaf(q, -L2Uf, d); - s = mlaf(q, -L2Lf, s); + s = mlaf(q, -L2Uf, d); + s = mlaf(q, -L2Lf, s); - u = 0.00136324646882712841033936f; - u = mlaf(u, s, 0.00836596917361021041870117f); - u = mlaf(u, s, 0.0416710823774337768554688f); - u = mlaf(u, s, 0.166665524244308471679688f); - u = mlaf(u, s, 0.499999850988388061523438f); + u = 0.00136324646882712841033936f; + u = mlaf(u, s, 0.00836596917361021041870117f); + u = mlaf(u, s, 0.0416710823774337768554688f); + u = mlaf(u, s, 0.166665524244308471679688f); + u = mlaf(u, s, 0.499999850988388061523438f); - u = mlaf( s, mlaf(s,u,1.f),1.f); - return ldexpkf(u, q); + u = mlaf( s, mlaf(s,u,1.f),1.f); + return ldexpkf(u, q); } __inline float xmul2f(float d) { - union { - float floatval; - int intval; - } uflint; - uflint.floatval = d; - if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing - uflint.intval += 1 << 23; // add 1 to the exponent - } - return uflint.floatval; + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval += 1 << 23; // add 1 to the exponent + } + return uflint.floatval; } __inline float xdiv2f(float d) { - union { - float floatval; - int intval; - } uflint; - uflint.floatval = d; - if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing - uflint.intval -= 1 << 23; // sub 1 from the exponent - } - return uflint.floatval; + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval -= 1 << 23; // sub 1 from the exponent + } + return uflint.floatval; } __inline float xdivf( float d, int n){ - union { - float floatval; - int intval; - } uflint; - uflint.floatval = d; - if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing - uflint.intval -= n << 23; // add n to the exponent - } - return uflint.floatval; + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval -= n << 23; // add n to the exponent + } + return uflint.floatval; } __inline float xlin2log(float x, float base) diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index cce88df5d..1982c7c4c 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -48,199 +48,199 @@ #define NANf ((float)rtengine::RT_NAN) static INLINE vdouble vadd3(vdouble v0, vdouble v1, vdouble v2) { - return vadd(vadd(v0, v1), v2); + return vadd(vadd(v0, v1), v2); } static INLINE vdouble vadd4(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { - return vadd3(vadd(v0, v1), v2, v3); + return vadd3(vadd(v0, v1), v2, v3); } static INLINE vdouble vadd5(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { - return vadd4(vadd(v0, v1), v2, v3, v4); + return vadd4(vadd(v0, v1), v2, v3, v4); } static INLINE vdouble vadd6(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { - return vadd5(vadd(v0, v1), v2, v3, v4, v5); + return vadd5(vadd(v0, v1), v2, v3, v4, v5); } static INLINE vdouble vadd7(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5, vdouble v6) { - return vadd6(vadd(v0, v1), v2, v3, v4, v5, v6); + return vadd6(vadd(v0, v1), v2, v3, v4, v5, v6); } static INLINE vdouble vsub3(vdouble v0, vdouble v1, vdouble v2) { - return vsub(vsub(v0, v1), v2); + return vsub(vsub(v0, v1), v2); } static INLINE vdouble vsub4(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { - return vsub3(vsub(v0, v1), v2, v3); + return vsub3(vsub(v0, v1), v2, v3); } static INLINE vdouble vsub5(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { - return vsub4(vsub(v0, v1), v2, v3, v4); + return vsub4(vsub(v0, v1), v2, v3, v4); } // static INLINE vdouble2 normalize_d(vdouble2 t) { - vdouble2 s; + vdouble2 s; - s.x = vadd(t.x, t.y); - s.y = vadd(vsub(t.x, s.x), t.y); + s.x = vadd(t.x, t.y); + s.y = vadd(vsub(t.x, s.x), t.y); - return s; + return s; } static INLINE vdouble2 scale_d(vdouble2 d, vdouble s) { - vdouble2 r = {vmul(d.x, s), vmul(d.y, s)}; - return r; + vdouble2 r = {vmul(d.x, s), vmul(d.y, s)}; + return r; } static INLINE vdouble2 add_ss(vdouble x, vdouble y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x, y); - r.y = vadd(vsub(x, r.x), y); + r.x = vadd(x, y); + r.y = vadd(vsub(x, r.x), y); - return r; + return r; } static INLINE vdouble2 add2_ss(vdouble x, vdouble y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x, y); - vdouble v = vsub(r.x, x); - r.y = vadd(vsub(x, vsub(r.x, v)), vsub(y, v)); + r.x = vadd(x, y); + vdouble v = vsub(r.x, x); + r.y = vadd(vsub(x, vsub(r.x, v)), vsub(y, v)); - return r; + return r; } static INLINE vdouble2 add_ds(vdouble2 x, vdouble y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x.x, y); - r.y = vadd3(vsub(x.x, r.x), y, x.y); + r.x = vadd(x.x, y); + r.y = vadd3(vsub(x.x, r.x), y, x.y); - return r; + return r; } static INLINE vdouble2 add2_ds(vdouble2 x, vdouble y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x.x, y); - vdouble v = vsub(r.x, x.x); - r.y = vadd(vsub(x.x, vsub(r.x, v)), vsub(y, v)); - r.y = vadd(r.y, x.y); + r.x = vadd(x.x, y); + vdouble v = vsub(r.x, x.x); + r.y = vadd(vsub(x.x, vsub(r.x, v)), vsub(y, v)); + r.y = vadd(r.y, x.y); - return r; + return r; } static INLINE vdouble2 add_sd(vdouble x, vdouble2 y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x, y.x); - r.y = vadd3(vsub(x, r.x), y.x, y.y); + r.x = vadd(x, y.x); + r.y = vadd3(vsub(x, r.x), y.x, y.y); - return r; + return r; } static INLINE vdouble2 add_dd(vdouble2 x, vdouble2 y) { - // |x| >= |y| + // |x| >= |y| - vdouble2 r; + vdouble2 r; - r.x = vadd(x.x, y.x); - r.y = vadd4(vsub(x.x, r.x), y.x, x.y, y.y); + r.x = vadd(x.x, y.x); + r.y = vadd4(vsub(x.x, r.x), y.x, x.y, y.y); - return r; + return r; } static INLINE vdouble2 add2_dd(vdouble2 x, vdouble2 y) { - vdouble2 r; + vdouble2 r; - r.x = vadd(x.x, y.x); - vdouble v = vsub(r.x, x.x); - r.y = vadd(vsub(x.x, vsub(r.x, v)), vsub(y.x, v)); - r.y = vadd(r.y, vadd(x.y, y.y)); + r.x = vadd(x.x, y.x); + vdouble v = vsub(r.x, x.x); + r.y = vadd(vsub(x.x, vsub(r.x, v)), vsub(y.x, v)); + r.y = vadd(r.y, vadd(x.y, y.y)); - return r; + return r; } static INLINE vdouble2 div_dd(vdouble2 n, vdouble2 d) { - vdouble t = vrec(d.x); - vdouble dh = vupper(d.x), dl = vsub(d.x, dh); - vdouble th = vupper(t ), tl = vsub(t , th); - vdouble nhh = vupper(n.x), nhl = vsub(n.x, nhh); + vdouble t = vrec(d.x); + vdouble dh = vupper(d.x), dl = vsub(d.x, dh); + vdouble th = vupper(t ), tl = vsub(t , th); + vdouble nhh = vupper(n.x), nhl = vsub(n.x, nhh); - vdouble2 q; + vdouble2 q; - q.x = vmul(n.x, t); + q.x = vmul(n.x, t); - vdouble u = vadd5(vsub(vmul(nhh, th), q.x), vmul(nhh, tl), vmul(nhl, th), vmul(nhl, tl), - vmul(q.x, vsub5(vcast_vd_d(1), vmul(dh, th), vmul(dh, tl), vmul(dl, th), vmul(dl, tl)))); + vdouble u = vadd5(vsub(vmul(nhh, th), q.x), vmul(nhh, tl), vmul(nhl, th), vmul(nhl, tl), + vmul(q.x, vsub5(vcast_vd_d(1), vmul(dh, th), vmul(dh, tl), vmul(dl, th), vmul(dl, tl)))); - q.y = vadd(vmul(t, vsub(n.y, vmul(q.x, d.y))), u); + q.y = vadd(vmul(t, vsub(n.y, vmul(q.x, d.y))), u); - return q; + return q; } static INLINE vdouble2 mul_ss(vdouble x, vdouble y) { - vdouble xh = vupper(x), xl = vsub(x, xh); - vdouble yh = vupper(y), yl = vsub(y, yh); - vdouble2 r; + vdouble xh = vupper(x), xl = vsub(x, xh); + vdouble yh = vupper(y), yl = vsub(y, yh); + vdouble2 r; - r.x = vmul(x, y); - r.y = vadd5(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl)); + r.x = vmul(x, y); + r.y = vadd5(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl)); - return r; + return r; } static INLINE vdouble2 mul_ds(vdouble2 x, vdouble y) { - vdouble xh = vupper(x.x), xl = vsub(x.x, xh); - vdouble yh = vupper(y ), yl = vsub(y , yh); - vdouble2 r; + vdouble xh = vupper(x.x), xl = vsub(x.x, xh); + vdouble yh = vupper(y ), yl = vsub(y , yh); + vdouble2 r; - r.x = vmul(x.x, y); - r.y = vadd6(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl), vmul(x.y, y)); + r.x = vmul(x.x, y); + r.y = vadd6(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl), vmul(x.y, y)); - return r; + return r; } static INLINE vdouble2 mul_dd(vdouble2 x, vdouble2 y) { - vdouble xh = vupper(x.x), xl = vsub(x.x, xh); - vdouble yh = vupper(y.x), yl = vsub(y.x, yh); - vdouble2 r; + vdouble xh = vupper(x.x), xl = vsub(x.x, xh); + vdouble yh = vupper(y.x), yl = vsub(y.x, yh); + vdouble2 r; - r.x = vmul(x.x, y.x); - r.y = vadd7(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl), vmul(x.x, y.y), vmul(x.y, y.x)); + r.x = vmul(x.x, y.x); + r.y = vadd7(vmul(xh, yh), vneg(r.x), vmul(xl, yh), vmul(xh, yl), vmul(xl, yl), vmul(x.x, y.y), vmul(x.y, y.x)); - return r; + return r; } static INLINE vdouble2 squ_d(vdouble2 x) { - vdouble xh = vupper(x.x), xl = vsub(x.x, xh); - vdouble2 r; + vdouble xh = vupper(x.x), xl = vsub(x.x, xh); + vdouble2 r; - r.x = vmul(x.x, x.x); - r.y = vadd5(vmul(xh, xh), vneg(r.x), vmul(vadd(xh, xh), xl), vmul(xl, xl), vmul(x.x, vadd(x.y, x.y))); + r.x = vmul(x.x, x.x); + r.y = vadd5(vmul(xh, xh), vneg(r.x), vmul(vadd(xh, xh), xl), vmul(xl, xl), vmul(x.x, vadd(x.y, x.y))); - return r; + return r; } static INLINE vdouble2 rec_s(vdouble d) { - vdouble t = vrec(d); - vdouble dh = vupper(d), dl = vsub(d, dh); - vdouble th = vupper(t), tl = vsub(t, th); - vdouble2 q; + vdouble t = vrec(d); + vdouble dh = vupper(d), dl = vsub(d, dh); + vdouble th = vupper(t), tl = vsub(t, th); + vdouble2 q; - q.x = t; - q.y = vmul(t, vsub5(vcast_vd_d(1), vmul(dh, th), vmul(dh, tl), vmul(dl, th), vmul(dl, tl))); + q.x = t; + q.y = vmul(t, vsub5(vcast_vd_d(1), vmul(dh, th), vmul(dh, tl), vmul(dl, th), vmul(dl, tl))); - return q; + return q; } static INLINE vdouble2 sqrt_d(vdouble2 d) { - vdouble t = vsqrt(vadd(d.x, d.y)); - return scale_d(mul_dd(add2_dd(d, mul_ss(t, t)), rec_s(t)), vcast_vd_d(0.5)); + vdouble t = vsqrt(vadd(d.x, d.y)); + return scale_d(mul_dd(add2_dd(d, mul_ss(t, t)), rec_s(t)), vcast_vd_d(0.5)); } // @@ -248,688 +248,688 @@ static INLINE vdouble2 sqrt_d(vdouble2 d) { static INLINE vdouble xldexp(vdouble x, vint q) { return vldexp(x, q); } static INLINE vint xilogb(vdouble d) { - vdouble e = vcast_vd_vi(vsubi(vilogbp1(vabs(d)), vcast_vi_i(1))); - e = vsel(vmask_eq(d, vcast_vd_d(0)), vcast_vd_d(-2147483648.0), e); - e = vsel(vmask_eq(vabs(d), vcast_vd_d(rtengine::RT_INFINITY)), vcast_vd_d(2147483647), e); - return vrint_vi_vd(e); + vdouble e = vcast_vd_vi(vsubi(vilogbp1(vabs(d)), vcast_vi_i(1))); + e = vsel(vmask_eq(d, vcast_vd_d(0)), vcast_vd_d(-2147483648.0), e); + e = vsel(vmask_eq(vabs(d), vcast_vd_d(rtengine::RT_INFINITY)), vcast_vd_d(2147483647), e); + return vrint_vi_vd(e); } static INLINE vdouble xsin(vdouble d) { - vint q; - vdouble u, s; + vint q; + vdouble u, s; - q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_1_PI))); + q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_1_PI))); - u = vcast_vd_vi(q); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_A*4))); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_B*4))); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_C*4))); + u = vcast_vd_vi(q); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_A*4))); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_B*4))); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_C*4))); - s = vmul(d, d); + s = vmul(d, d); - d = vsel(vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)), vneg(d), d); + d = vsel(vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)), vneg(d), d); - u = vcast_vd_d(-7.97255955009037868891952e-18); - u = vmla(u, s, vcast_vd_d(2.81009972710863200091251e-15)); - u = vmla(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); - u = vmla(u, s, vcast_vd_d(1.60590430605664501629054e-10)); - u = vmla(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); - u = vmla(u, s, vcast_vd_d(2.75573192239198747630416e-06)); - u = vmla(u, s, vcast_vd_d(-0.000198412698412696162806809)); - u = vmla(u, s, vcast_vd_d(0.00833333333333332974823815)); - u = vmla(u, s, vcast_vd_d(-0.166666666666666657414808)); + u = vcast_vd_d(-7.97255955009037868891952e-18); + u = vmla(u, s, vcast_vd_d(2.81009972710863200091251e-15)); + u = vmla(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); + u = vmla(u, s, vcast_vd_d(1.60590430605664501629054e-10)); + u = vmla(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); + u = vmla(u, s, vcast_vd_d(2.75573192239198747630416e-06)); + u = vmla(u, s, vcast_vd_d(-0.000198412698412696162806809)); + u = vmla(u, s, vcast_vd_d(0.00833333333333332974823815)); + u = vmla(u, s, vcast_vd_d(-0.166666666666666657414808)); - u = vmla(s, vmul(u, d), d); + u = vmla(s, vmul(u, d), d); - return u; + return u; } static INLINE vdouble xcos(vdouble d) { - vint q; - vdouble u, s; + vint q; + vdouble u, s; - q = vrint_vi_vd(vsub(vmul(d, vcast_vd_d(rtengine::RT_1_PI)), vcast_vd_d(0.5))); - q = vaddi(vaddi(q, q), vcast_vi_i(1)); + q = vrint_vi_vd(vsub(vmul(d, vcast_vd_d(rtengine::RT_1_PI)), vcast_vd_d(0.5))); + q = vaddi(vaddi(q, q), vcast_vi_i(1)); - u = vcast_vd_vi(q); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_A*2))); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_B*2))); - d = vadd(d, vmul(u, vcast_vd_d(-PI4_C*2))); + u = vcast_vd_vi(q); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_A*2))); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_B*2))); + d = vadd(d, vmul(u, vcast_vd_d(-PI4_C*2))); - s = vmul(d, d); + s = vmul(d, d); - d = vsel(vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(0)), vneg(d), d); + d = vsel(vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(0)), vneg(d), d); - u = vcast_vd_d(-7.97255955009037868891952e-18); - u = vmla(u, s, vcast_vd_d(2.81009972710863200091251e-15)); - u = vmla(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); - u = vmla(u, s, vcast_vd_d(1.60590430605664501629054e-10)); - u = vmla(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); - u = vmla(u, s, vcast_vd_d(2.75573192239198747630416e-06)); - u = vmla(u, s, vcast_vd_d(-0.000198412698412696162806809)); - u = vmla(u, s, vcast_vd_d(0.00833333333333332974823815)); - u = vmla(u, s, vcast_vd_d(-0.166666666666666657414808)); + u = vcast_vd_d(-7.97255955009037868891952e-18); + u = vmla(u, s, vcast_vd_d(2.81009972710863200091251e-15)); + u = vmla(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); + u = vmla(u, s, vcast_vd_d(1.60590430605664501629054e-10)); + u = vmla(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); + u = vmla(u, s, vcast_vd_d(2.75573192239198747630416e-06)); + u = vmla(u, s, vcast_vd_d(-0.000198412698412696162806809)); + u = vmla(u, s, vcast_vd_d(0.00833333333333332974823815)); + u = vmla(u, s, vcast_vd_d(-0.166666666666666657414808)); - u = vmla(s, vmul(u, d), d); + u = vmla(s, vmul(u, d), d); - return u; + return u; } static INLINE vdouble2 xsincos(vdouble d) { - vint q; - vmask m; - vdouble u, s, t, rx, ry; - vdouble2 r; + vint q; + vmask m; + vdouble u, s, t, rx, ry; + vdouble2 r; - q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_2_PI))); + q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_2_PI))); - s = d; + s = d; - u = vcast_vd_vi(q); - s = vmla(u, vcast_vd_d(-PI4_A*2), s); - s = vmla(u, vcast_vd_d(-PI4_B*2), s); - s = vmla(u, vcast_vd_d(-PI4_C*2), s); + u = vcast_vd_vi(q); + s = vmla(u, vcast_vd_d(-PI4_A*2), s); + s = vmla(u, vcast_vd_d(-PI4_B*2), s); + s = vmla(u, vcast_vd_d(-PI4_C*2), s); - t = s; + t = s; - s = vmul(s, s); + s = vmul(s, s); - u = vcast_vd_d(1.58938307283228937328511e-10); - u = vmla(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); - u = vmla(u, s, vcast_vd_d(2.75573131776846360512547e-06)); - u = vmla(u, s, vcast_vd_d(-0.000198412698278911770864914)); - u = vmla(u, s, vcast_vd_d(0.0083333333333191845961746)); - u = vmla(u, s, vcast_vd_d(-0.166666666666666130709393)); - u = vmul(vmul(u, s), t); + u = vcast_vd_d(1.58938307283228937328511e-10); + u = vmla(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); + u = vmla(u, s, vcast_vd_d(2.75573131776846360512547e-06)); + u = vmla(u, s, vcast_vd_d(-0.000198412698278911770864914)); + u = vmla(u, s, vcast_vd_d(0.0083333333333191845961746)); + u = vmla(u, s, vcast_vd_d(-0.166666666666666130709393)); + u = vmul(vmul(u, s), t); - rx = vadd(t, u); + rx = vadd(t, u); - u = vcast_vd_d(-1.13615350239097429531523e-11); - u = vmla(u, s, vcast_vd_d(2.08757471207040055479366e-09)); - u = vmla(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); - u = vmla(u, s, vcast_vd_d(2.48015872890001867311915e-05)); - u = vmla(u, s, vcast_vd_d(-0.00138888888888714019282329)); - u = vmla(u, s, vcast_vd_d(0.0416666666666665519592062)); - u = vmla(u, s, vcast_vd_d(-0.5)); + u = vcast_vd_d(-1.13615350239097429531523e-11); + u = vmla(u, s, vcast_vd_d(2.08757471207040055479366e-09)); + u = vmla(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); + u = vmla(u, s, vcast_vd_d(2.48015872890001867311915e-05)); + u = vmla(u, s, vcast_vd_d(-0.00138888888888714019282329)); + u = vmla(u, s, vcast_vd_d(0.0416666666666665519592062)); + u = vmla(u, s, vcast_vd_d(-0.5)); - ry = vadd(vcast_vd_d(1), vmul(s, u)); + ry = vadd(vcast_vd_d(1), vmul(s, u)); - m = vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(0)); - r.x = vsel(m, rx, ry); - r.y = vsel(m, ry, rx); + m = vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(0)); + r.x = vsel(m, rx, ry); + r.y = vsel(m, ry, rx); - m = vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(2)); - r.x = vreinterpret_vd_vm(vxorm(vandm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); + m = vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(2)); + r.x = vreinterpret_vd_vm(vxorm(vandm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); - m = vmaski_eq(vandi(vaddi(q, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2)); - r.y = vreinterpret_vd_vm(vxorm(vandm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); + m = vmaski_eq(vandi(vaddi(q, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2)); + r.y = vreinterpret_vd_vm(vxorm(vandm(m, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); - m = vmask_isinf(d); - r.x = vsel(m, vcast_vd_d(rtengine::RT_NAN), r.x); - r.y = vsel(m, vcast_vd_d(rtengine::RT_NAN), r.y); + m = vmask_isinf(d); + r.x = vsel(m, vcast_vd_d(rtengine::RT_NAN), r.x); + r.y = vsel(m, vcast_vd_d(rtengine::RT_NAN), r.y); - return r; + return r; } static INLINE vdouble xtan(vdouble d) { - vint q; - vdouble u, s, x; - vmask m; + vint q; + vdouble u, s, x; + vmask m; - q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_2_PI))); + q = vrint_vi_vd(vmul(d, vcast_vd_d(rtengine::RT_2_PI))); - u = vcast_vd_vi(q); - x = vadd(d, vmul(u, vcast_vd_d(-PI4_A*2))); - x = vadd(x, vmul(u, vcast_vd_d(-PI4_B*2))); - x = vadd(x, vmul(u, vcast_vd_d(-PI4_C*2))); + u = vcast_vd_vi(q); + x = vadd(d, vmul(u, vcast_vd_d(-PI4_A*2))); + x = vadd(x, vmul(u, vcast_vd_d(-PI4_B*2))); + x = vadd(x, vmul(u, vcast_vd_d(-PI4_C*2))); - s = vmul(x, x); + s = vmul(x, x); - m = vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)); - x = vsel(m, vneg(x), x); + m = vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)); + x = vsel(m, vneg(x), x); - u = vcast_vd_d(1.01419718511083373224408e-05); - u = vmla(u, s, vcast_vd_d(-2.59519791585924697698614e-05)); - u = vmla(u, s, vcast_vd_d(5.23388081915899855325186e-05)); - u = vmla(u, s, vcast_vd_d(-3.05033014433946488225616e-05)); - u = vmla(u, s, vcast_vd_d(7.14707504084242744267497e-05)); - u = vmla(u, s, vcast_vd_d(8.09674518280159187045078e-05)); - u = vmla(u, s, vcast_vd_d(0.000244884931879331847054404)); - u = vmla(u, s, vcast_vd_d(0.000588505168743587154904506)); - u = vmla(u, s, vcast_vd_d(0.00145612788922812427978848)); - u = vmla(u, s, vcast_vd_d(0.00359208743836906619142924)); - u = vmla(u, s, vcast_vd_d(0.00886323944362401618113356)); - u = vmla(u, s, vcast_vd_d(0.0218694882853846389592078)); - u = vmla(u, s, vcast_vd_d(0.0539682539781298417636002)); - u = vmla(u, s, vcast_vd_d(0.133333333333125941821962)); - u = vmla(u, s, vcast_vd_d(0.333333333333334980164153)); + u = vcast_vd_d(1.01419718511083373224408e-05); + u = vmla(u, s, vcast_vd_d(-2.59519791585924697698614e-05)); + u = vmla(u, s, vcast_vd_d(5.23388081915899855325186e-05)); + u = vmla(u, s, vcast_vd_d(-3.05033014433946488225616e-05)); + u = vmla(u, s, vcast_vd_d(7.14707504084242744267497e-05)); + u = vmla(u, s, vcast_vd_d(8.09674518280159187045078e-05)); + u = vmla(u, s, vcast_vd_d(0.000244884931879331847054404)); + u = vmla(u, s, vcast_vd_d(0.000588505168743587154904506)); + u = vmla(u, s, vcast_vd_d(0.00145612788922812427978848)); + u = vmla(u, s, vcast_vd_d(0.00359208743836906619142924)); + u = vmla(u, s, vcast_vd_d(0.00886323944362401618113356)); + u = vmla(u, s, vcast_vd_d(0.0218694882853846389592078)); + u = vmla(u, s, vcast_vd_d(0.0539682539781298417636002)); + u = vmla(u, s, vcast_vd_d(0.133333333333125941821962)); + u = vmla(u, s, vcast_vd_d(0.333333333333334980164153)); - u = vmla(s, vmul(u, x), x); + u = vmla(s, vmul(u, x), x); - u = vsel(m, vrec(u), u); + u = vsel(m, vrec(u), u); - u = vsel(vmask_isinf(d), vcast_vd_d(rtengine::RT_NAN), u); + u = vsel(vmask_isinf(d), vcast_vd_d(rtengine::RT_NAN), u); - return u; + return u; } static INLINE vdouble atan2k(vdouble y, vdouble x) { - vdouble s, t, u; - vint q; - vmask p; + vdouble s, t, u; + vint q; + vmask p; - q = vseli_lt(x, vcast_vd_d(0), vcast_vi_i(-2), vcast_vi_i(0)); - x = vabs(x); + q = vseli_lt(x, vcast_vd_d(0), vcast_vi_i(-2), vcast_vi_i(0)); + x = vabs(x); - q = vseli_lt(x, y, vaddi(q, vcast_vi_i(1)), q); - p = vmask_lt(x, y); - s = vsel (p, vneg(x), y); - t = vmax (x, y); + q = vseli_lt(x, y, vaddi(q, vcast_vi_i(1)), q); + p = vmask_lt(x, y); + s = vsel (p, vneg(x), y); + t = vmax (x, y); - s = vdiv(s, t); - t = vmul(s, s); + s = vdiv(s, t); + t = vmul(s, s); - u = vcast_vd_d(-1.88796008463073496563746e-05); - u = vmla(u, t, vcast_vd_d(0.000209850076645816976906797)); - u = vmla(u, t, vcast_vd_d(-0.00110611831486672482563471)); - u = vmla(u, t, vcast_vd_d(0.00370026744188713119232403)); - u = vmla(u, t, vcast_vd_d(-0.00889896195887655491740809)); - u = vmla(u, t, vcast_vd_d(0.016599329773529201970117)); - u = vmla(u, t, vcast_vd_d(-0.0254517624932312641616861)); - u = vmla(u, t, vcast_vd_d(0.0337852580001353069993897)); - u = vmla(u, t, vcast_vd_d(-0.0407629191276836500001934)); - u = vmla(u, t, vcast_vd_d(0.0466667150077840625632675)); - u = vmla(u, t, vcast_vd_d(-0.0523674852303482457616113)); - u = vmla(u, t, vcast_vd_d(0.0587666392926673580854313)); - u = vmla(u, t, vcast_vd_d(-0.0666573579361080525984562)); - u = vmla(u, t, vcast_vd_d(0.0769219538311769618355029)); - u = vmla(u, t, vcast_vd_d(-0.090908995008245008229153)); - u = vmla(u, t, vcast_vd_d(0.111111105648261418443745)); - u = vmla(u, t, vcast_vd_d(-0.14285714266771329383765)); - u = vmla(u, t, vcast_vd_d(0.199999999996591265594148)); - u = vmla(u, t, vcast_vd_d(-0.333333333333311110369124)); + u = vcast_vd_d(-1.88796008463073496563746e-05); + u = vmla(u, t, vcast_vd_d(0.000209850076645816976906797)); + u = vmla(u, t, vcast_vd_d(-0.00110611831486672482563471)); + u = vmla(u, t, vcast_vd_d(0.00370026744188713119232403)); + u = vmla(u, t, vcast_vd_d(-0.00889896195887655491740809)); + u = vmla(u, t, vcast_vd_d(0.016599329773529201970117)); + u = vmla(u, t, vcast_vd_d(-0.0254517624932312641616861)); + u = vmla(u, t, vcast_vd_d(0.0337852580001353069993897)); + u = vmla(u, t, vcast_vd_d(-0.0407629191276836500001934)); + u = vmla(u, t, vcast_vd_d(0.0466667150077840625632675)); + u = vmla(u, t, vcast_vd_d(-0.0523674852303482457616113)); + u = vmla(u, t, vcast_vd_d(0.0587666392926673580854313)); + u = vmla(u, t, vcast_vd_d(-0.0666573579361080525984562)); + u = vmla(u, t, vcast_vd_d(0.0769219538311769618355029)); + u = vmla(u, t, vcast_vd_d(-0.090908995008245008229153)); + u = vmla(u, t, vcast_vd_d(0.111111105648261418443745)); + u = vmla(u, t, vcast_vd_d(-0.14285714266771329383765)); + u = vmla(u, t, vcast_vd_d(0.199999999996591265594148)); + u = vmla(u, t, vcast_vd_d(-0.333333333333311110369124)); - t = vadd(s, vmul(s, vmul(t, u))); - t = vadd(t, vmul(vcast_vd_vi(q), vcast_vd_d(rtengine::RT_PI/2))); + t = vadd(s, vmul(s, vmul(t, u))); + t = vadd(t, vmul(vcast_vd_vi(q), vcast_vd_d(rtengine::RT_PI/2))); - return t; + return t; } static INLINE vdouble xatan2(vdouble y, vdouble x) { - vdouble r = atan2k(vabs(y), x); + vdouble r = atan2k(vabs(y), x); - r = vmulsign(r, x); - r = vsel(vorm(vmask_isinf(x), vmask_eq(x, vcast_vd_d(0))), vsub(vcast_vd_d(rtengine::RT_PI/2), visinf2(x, vmulsign(vcast_vd_d(rtengine::RT_PI/2), x))), r); - r = vsel(vmask_isinf(y), vsub(vcast_vd_d(rtengine::RT_PI/2), visinf2(x, vmulsign(vcast_vd_d(rtengine::RT_PI/4), x))), r); - r = vsel(vmask_eq(y, vcast_vd_d(0)), vsel(vmask_eq(vsign(x), vcast_vd_d(-1.0)), vcast_vd_d(rtengine::RT_PI), vcast_vd_d(0)), r); + r = vmulsign(r, x); + r = vsel(vorm(vmask_isinf(x), vmask_eq(x, vcast_vd_d(0))), vsub(vcast_vd_d(rtengine::RT_PI/2), visinf2(x, vmulsign(vcast_vd_d(rtengine::RT_PI/2), x))), r); + r = vsel(vmask_isinf(y), vsub(vcast_vd_d(rtengine::RT_PI/2), visinf2(x, vmulsign(vcast_vd_d(rtengine::RT_PI/4), x))), r); + r = vsel(vmask_eq(y, vcast_vd_d(0)), vsel(vmask_eq(vsign(x), vcast_vd_d(-1.0)), vcast_vd_d(rtengine::RT_PI), vcast_vd_d(0)), r); - return vsel(vorm(vmask_isnan(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), vmulsign(r, y)); + return vsel(vorm(vmask_isnan(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), vmulsign(r, y)); } static INLINE vdouble xasin(vdouble d) { - vdouble x, y; - x = vadd(vcast_vd_d(1), d); - y = vsub(vcast_vd_d(1), d); - x = vmul(x, y); - x = vsqrt(x); - x = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), atan2k(vabs(d), x)); - return vmulsign(x, d); + vdouble x, y; + x = vadd(vcast_vd_d(1), d); + y = vsub(vcast_vd_d(1), d); + x = vmul(x, y); + x = vsqrt(x); + x = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), atan2k(vabs(d), x)); + return vmulsign(x, d); } static INLINE vdouble xacos(vdouble d) { - vdouble x, y; - x = vadd(vcast_vd_d(1), d); - y = vsub(vcast_vd_d(1), d); - x = vmul(x, y); - x = vsqrt(x); - x = vmulsign(atan2k(x, vabs(d)), d); - y = (vdouble)vandm(vmask_lt(d, vcast_vd_d(0)), (vmask)vcast_vd_d(rtengine::RT_PI)); - x = vadd(x, y); - return x; + vdouble x, y; + x = vadd(vcast_vd_d(1), d); + y = vsub(vcast_vd_d(1), d); + x = vmul(x, y); + x = vsqrt(x); + x = vmulsign(atan2k(x, vabs(d)), d); + y = (vdouble)vandm(vmask_lt(d, vcast_vd_d(0)), (vmask)vcast_vd_d(rtengine::RT_PI)); + x = vadd(x, y); + return x; } static INLINE vdouble xatan(vdouble s) { - vdouble t, u; - vint q; + vdouble t, u; + vint q; - q = vseli_lt(s, vcast_vd_d(0), vcast_vi_i(2), vcast_vi_i(0)); - s = vabs(s); + q = vseli_lt(s, vcast_vd_d(0), vcast_vi_i(2), vcast_vi_i(0)); + s = vabs(s); - q = vseli_lt(vcast_vd_d(1), s, vaddi(q, vcast_vi_i(1)), q); - s = vsel(vmask_lt(vcast_vd_d(1), s), vdiv(vcast_vd_d(1), s), s); + q = vseli_lt(vcast_vd_d(1), s, vaddi(q, vcast_vi_i(1)), q); + s = vsel(vmask_lt(vcast_vd_d(1), s), vdiv(vcast_vd_d(1), s), s); - t = vmul(s, s); + t = vmul(s, s); - u = vcast_vd_d(-1.88796008463073496563746e-05); - u = vmla(u, t, vcast_vd_d(0.000209850076645816976906797)); - u = vmla(u, t, vcast_vd_d(-0.00110611831486672482563471)); - u = vmla(u, t, vcast_vd_d(0.00370026744188713119232403)); - u = vmla(u, t, vcast_vd_d(-0.00889896195887655491740809)); - u = vmla(u, t, vcast_vd_d(0.016599329773529201970117)); - u = vmla(u, t, vcast_vd_d(-0.0254517624932312641616861)); - u = vmla(u, t, vcast_vd_d(0.0337852580001353069993897)); - u = vmla(u, t, vcast_vd_d(-0.0407629191276836500001934)); - u = vmla(u, t, vcast_vd_d(0.0466667150077840625632675)); - u = vmla(u, t, vcast_vd_d(-0.0523674852303482457616113)); - u = vmla(u, t, vcast_vd_d(0.0587666392926673580854313)); - u = vmla(u, t, vcast_vd_d(-0.0666573579361080525984562)); - u = vmla(u, t, vcast_vd_d(0.0769219538311769618355029)); - u = vmla(u, t, vcast_vd_d(-0.090908995008245008229153)); - u = vmla(u, t, vcast_vd_d(0.111111105648261418443745)); - u = vmla(u, t, vcast_vd_d(-0.14285714266771329383765)); - u = vmla(u, t, vcast_vd_d(0.199999999996591265594148)); - u = vmla(u, t, vcast_vd_d(-0.333333333333311110369124)); + u = vcast_vd_d(-1.88796008463073496563746e-05); + u = vmla(u, t, vcast_vd_d(0.000209850076645816976906797)); + u = vmla(u, t, vcast_vd_d(-0.00110611831486672482563471)); + u = vmla(u, t, vcast_vd_d(0.00370026744188713119232403)); + u = vmla(u, t, vcast_vd_d(-0.00889896195887655491740809)); + u = vmla(u, t, vcast_vd_d(0.016599329773529201970117)); + u = vmla(u, t, vcast_vd_d(-0.0254517624932312641616861)); + u = vmla(u, t, vcast_vd_d(0.0337852580001353069993897)); + u = vmla(u, t, vcast_vd_d(-0.0407629191276836500001934)); + u = vmla(u, t, vcast_vd_d(0.0466667150077840625632675)); + u = vmla(u, t, vcast_vd_d(-0.0523674852303482457616113)); + u = vmla(u, t, vcast_vd_d(0.0587666392926673580854313)); + u = vmla(u, t, vcast_vd_d(-0.0666573579361080525984562)); + u = vmla(u, t, vcast_vd_d(0.0769219538311769618355029)); + u = vmla(u, t, vcast_vd_d(-0.090908995008245008229153)); + u = vmla(u, t, vcast_vd_d(0.111111105648261418443745)); + u = vmla(u, t, vcast_vd_d(-0.14285714266771329383765)); + u = vmla(u, t, vcast_vd_d(0.199999999996591265594148)); + u = vmla(u, t, vcast_vd_d(-0.333333333333311110369124)); - t = vadd(s, vmul(s, vmul(t, u))); + t = vadd(s, vmul(s, vmul(t, u))); - t = vsel(vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)), vsub(vcast_vd_d(rtengine::RT_PI/2), t), t); - t = vsel(vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(2)), vneg(t), t); + t = vsel(vmaski_eq(vandi(q, vcast_vi_i(1)), vcast_vi_i(1)), vsub(vcast_vd_d(rtengine::RT_PI/2), t), t); + t = vsel(vmaski_eq(vandi(q, vcast_vi_i(2)), vcast_vi_i(2)), vneg(t), t); - return t; + return t; } static INLINE vdouble xlog(vdouble d) { - vdouble x, x2; - vdouble t, m; - vint e; + vdouble x, x2; + vdouble t, m; + vint e; - e = vilogbp1(vmul(d, vcast_vd_d(0.7071))); - m = vldexp(d, vsubi(vcast_vi_i(0), e)); + e = vilogbp1(vmul(d, vcast_vd_d(0.7071))); + m = vldexp(d, vsubi(vcast_vi_i(0), e)); - x = vdiv(vadd(vcast_vd_d(-1), m), vadd(vcast_vd_d(1), m)); - x2 = vmul(x, x); + x = vdiv(vadd(vcast_vd_d(-1), m), vadd(vcast_vd_d(1), m)); + x2 = vmul(x, x); - t = vcast_vd_d(0.148197055177935105296783); - t = vmla(t, x2, vcast_vd_d(0.153108178020442575739679)); - t = vmla(t, x2, vcast_vd_d(0.181837339521549679055568)); - t = vmla(t, x2, vcast_vd_d(0.22222194152736701733275)); - t = vmla(t, x2, vcast_vd_d(0.285714288030134544449368)); - t = vmla(t, x2, vcast_vd_d(0.399999999989941956712869)); - t = vmla(t, x2, vcast_vd_d(0.666666666666685503450651)); - t = vmla(t, x2, vcast_vd_d(2)); + t = vcast_vd_d(0.148197055177935105296783); + t = vmla(t, x2, vcast_vd_d(0.153108178020442575739679)); + t = vmla(t, x2, vcast_vd_d(0.181837339521549679055568)); + t = vmla(t, x2, vcast_vd_d(0.22222194152736701733275)); + t = vmla(t, x2, vcast_vd_d(0.285714288030134544449368)); + t = vmla(t, x2, vcast_vd_d(0.399999999989941956712869)); + t = vmla(t, x2, vcast_vd_d(0.666666666666685503450651)); + t = vmla(t, x2, vcast_vd_d(2)); - x = vadd(vmul(x, t), vmul(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e))); + x = vadd(vmul(x, t), vmul(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e))); - x = vsel(vmask_ispinf(d), vcast_vd_d(rtengine::RT_INFINITY), x); - x = vsel(vmask_gt(vcast_vd_d(0), d), vcast_vd_d(rtengine::RT_NAN), x); - x = vsel(vmask_eq(d, vcast_vd_d(0)), vcast_vd_d(-rtengine::RT_INFINITY), x); + x = vsel(vmask_ispinf(d), vcast_vd_d(rtengine::RT_INFINITY), x); + x = vsel(vmask_gt(vcast_vd_d(0), d), vcast_vd_d(rtengine::RT_NAN), x); + x = vsel(vmask_eq(d, vcast_vd_d(0)), vcast_vd_d(-rtengine::RT_INFINITY), x); - return x; + return x; } static INLINE vdouble xexp(vdouble d) { - vint q = vrint_vi_vd(vmul(d, vcast_vd_d(R_LN2))); - vdouble s, u; + vint q = vrint_vi_vd(vmul(d, vcast_vd_d(R_LN2))); + vdouble s, u; - s = vadd(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); - s = vadd(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); + s = vadd(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); + s = vadd(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); - u = vcast_vd_d(2.08860621107283687536341e-09); - u = vmla(u, s, vcast_vd_d(2.51112930892876518610661e-08)); - u = vmla(u, s, vcast_vd_d(2.75573911234900471893338e-07)); - u = vmla(u, s, vcast_vd_d(2.75572362911928827629423e-06)); - u = vmla(u, s, vcast_vd_d(2.4801587159235472998791e-05)); - u = vmla(u, s, vcast_vd_d(0.000198412698960509205564975)); - u = vmla(u, s, vcast_vd_d(0.00138888888889774492207962)); - u = vmla(u, s, vcast_vd_d(0.00833333333331652721664984)); - u = vmla(u, s, vcast_vd_d(0.0416666666666665047591422)); - u = vmla(u, s, vcast_vd_d(0.166666666666666851703837)); - u = vmla(u, s, vcast_vd_d(0.5)); + u = vcast_vd_d(2.08860621107283687536341e-09); + u = vmla(u, s, vcast_vd_d(2.51112930892876518610661e-08)); + u = vmla(u, s, vcast_vd_d(2.75573911234900471893338e-07)); + u = vmla(u, s, vcast_vd_d(2.75572362911928827629423e-06)); + u = vmla(u, s, vcast_vd_d(2.4801587159235472998791e-05)); + u = vmla(u, s, vcast_vd_d(0.000198412698960509205564975)); + u = vmla(u, s, vcast_vd_d(0.00138888888889774492207962)); + u = vmla(u, s, vcast_vd_d(0.00833333333331652721664984)); + u = vmla(u, s, vcast_vd_d(0.0416666666666665047591422)); + u = vmla(u, s, vcast_vd_d(0.166666666666666851703837)); + u = vmla(u, s, vcast_vd_d(0.5)); - u = vadd(vcast_vd_d(1), vadd(s, vmul(vmul(s, s), u))); + u = vadd(vcast_vd_d(1), vadd(s, vmul(vmul(s, s), u))); - u = vldexp(u, q); + u = vldexp(u, q); - u = vsel(vmask_isminf(d), vcast_vd_d(0), u); + u = vsel(vmask_isminf(d), vcast_vd_d(0), u); - return u; + return u; } static INLINE vdouble2 logk(vdouble d) { - vdouble2 x, x2; - vdouble t, m; - vint e; + vdouble2 x, x2; + vdouble t, m; + vint e; - e = vilogbp1(vmul(d, vcast_vd_d(0.7071))); - m = vldexp(d, vsubi(vcast_vi_i(0), e)); + e = vilogbp1(vmul(d, vcast_vd_d(0.7071))); + m = vldexp(d, vsubi(vcast_vi_i(0), e)); - x = div_dd(add2_ss(vcast_vd_d(-1), m), add2_ss(vcast_vd_d(1), m)); - x2 = squ_d(x); - x2 = normalize_d(x2); + x = div_dd(add2_ss(vcast_vd_d(-1), m), add2_ss(vcast_vd_d(1), m)); + x2 = squ_d(x); + x2 = normalize_d(x2); - t = vcast_vd_d(0.134601987501262130076155); - t = vmla(t, x2.x, vcast_vd_d(0.132248509032032670243288)); - t = vmla(t, x2.x, vcast_vd_d(0.153883458318096079652524)); - t = vmla(t, x2.x, vcast_vd_d(0.181817427573705403298686)); - t = vmla(t, x2.x, vcast_vd_d(0.222222231326187414840781)); - t = vmla(t, x2.x, vcast_vd_d(0.285714285651261412873718)); - t = vmla(t, x2.x, vcast_vd_d(0.400000000000222439910458)); - t = vmla(t, x2.x, vcast_vd_d(0.666666666666666371239645)); + t = vcast_vd_d(0.134601987501262130076155); + t = vmla(t, x2.x, vcast_vd_d(0.132248509032032670243288)); + t = vmla(t, x2.x, vcast_vd_d(0.153883458318096079652524)); + t = vmla(t, x2.x, vcast_vd_d(0.181817427573705403298686)); + t = vmla(t, x2.x, vcast_vd_d(0.222222231326187414840781)); + t = vmla(t, x2.x, vcast_vd_d(0.285714285651261412873718)); + t = vmla(t, x2.x, vcast_vd_d(0.400000000000222439910458)); + t = vmla(t, x2.x, vcast_vd_d(0.666666666666666371239645)); - return add2_dd(mul_ds(dd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)), - vcast_vd_vi(e)), - add2_dd(scale_d(x, vcast_vd_d(2)), mul_ds(mul_dd(x2, x), t))); + return add2_dd(mul_ds(dd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)), + vcast_vd_vi(e)), + add2_dd(scale_d(x, vcast_vd_d(2)), mul_ds(mul_dd(x2, x), t))); } static INLINE vdouble expk(vdouble2 d) { - vdouble u = vmul(vadd(d.x, d.y), vcast_vd_d(R_LN2)); - vint q = vrint_vi_vd(u); - vdouble2 s, t; + vdouble u = vmul(vadd(d.x, d.y), vcast_vd_d(R_LN2)); + vint q = vrint_vi_vd(u); + vdouble2 s, t; - s = add2_ds(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); - s = add2_ds(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); + s = add2_ds(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); + s = add2_ds(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); - q = vrint_vi_vd(vmin(vmax(vcast_vd_d(-2047.49), u), vcast_vd_d(2047.49))); + q = vrint_vi_vd(vmin(vmax(vcast_vd_d(-2047.49), u), vcast_vd_d(2047.49))); - s = normalize_d(s); + s = normalize_d(s); - u = vcast_vd_d(2.51069683420950419527139e-08); - u = vmla(u, s.x, vcast_vd_d(2.76286166770270649116855e-07)); - u = vmla(u, s.x, vcast_vd_d(2.75572496725023574143864e-06)); - u = vmla(u, s.x, vcast_vd_d(2.48014973989819794114153e-05)); - u = vmla(u, s.x, vcast_vd_d(0.000198412698809069797676111)); - u = vmla(u, s.x, vcast_vd_d(0.0013888888939977128960529)); - u = vmla(u, s.x, vcast_vd_d(0.00833333333332371417601081)); - u = vmla(u, s.x, vcast_vd_d(0.0416666666665409524128449)); - u = vmla(u, s.x, vcast_vd_d(0.166666666666666740681535)); - u = vmla(u, s.x, vcast_vd_d(0.500000000000000999200722)); + u = vcast_vd_d(2.51069683420950419527139e-08); + u = vmla(u, s.x, vcast_vd_d(2.76286166770270649116855e-07)); + u = vmla(u, s.x, vcast_vd_d(2.75572496725023574143864e-06)); + u = vmla(u, s.x, vcast_vd_d(2.48014973989819794114153e-05)); + u = vmla(u, s.x, vcast_vd_d(0.000198412698809069797676111)); + u = vmla(u, s.x, vcast_vd_d(0.0013888888939977128960529)); + u = vmla(u, s.x, vcast_vd_d(0.00833333333332371417601081)); + u = vmla(u, s.x, vcast_vd_d(0.0416666666665409524128449)); + u = vmla(u, s.x, vcast_vd_d(0.166666666666666740681535)); + u = vmla(u, s.x, vcast_vd_d(0.500000000000000999200722)); - t = add_dd(s, mul_ds(squ_d(s), u)); + t = add_dd(s, mul_ds(squ_d(s), u)); - t = add_sd(vcast_vd_d(1), t); - u = vadd(t.x, t.y); - u = vldexp(u, q); + t = add_sd(vcast_vd_d(1), t); + u = vadd(t.x, t.y); + u = vldexp(u, q); - return u; + return u; } static INLINE vdouble xpow(vdouble x, vdouble y) { #if 1 - vmask yisint = vmask_eq(vcast_vd_vi(vrint_vi_vd(y)), y); - vmask yisodd = vandm(vmaski_eq(vandi(vrint_vi_vd(y), vcast_vi_i(1)), vcast_vi_i(1)), yisint); + vmask yisint = vmask_eq(vcast_vd_vi(vrint_vi_vd(y)), y); + vmask yisodd = vandm(vmaski_eq(vandi(vrint_vi_vd(y), vcast_vi_i(1)), vcast_vi_i(1)), yisint); - vdouble result = expk(mul_ds(logk(vabs(x)), y)); + vdouble result = expk(mul_ds(logk(vabs(x)), y)); - //result = vsel(vmask_isnan(result), vcast_vd_d(rtengine::RT_INFINITY), result); + //result = vsel(vmask_isnan(result), vcast_vd_d(rtengine::RT_INFINITY), result); - result = vmul(result, - vsel(vmask_gt(x, vcast_vd_d(0)), - vcast_vd_d(1), - vsel(yisint, - vsel(yisodd, - vcast_vd_d(-1), - vcast_vd_d(1)), - vcast_vd_d(rtengine::RT_NAN)))); + result = vmul(result, + vsel(vmask_gt(x, vcast_vd_d(0)), + vcast_vd_d(1), + vsel(yisint, + vsel(yisodd, + vcast_vd_d(-1), + vcast_vd_d(1)), + vcast_vd_d(rtengine::RT_NAN)))); - vdouble efx = vreinterpret_vd_vm(vxorm(vreinterpret_vm_vd(vsub(vabs(x), vcast_vd_d(1))), vsignbit(y))); + vdouble efx = vreinterpret_vd_vm(vxorm(vreinterpret_vm_vd(vsub(vabs(x), vcast_vd_d(1))), vsignbit(y))); - result = vsel(vmask_isinf(y), - vsel(vmask_lt(efx, vcast_vd_d(0)), - vcast_vd_d(0), - vsel(vmask_eq(efx, vcast_vd_d(0)), - vcast_vd_d(1.0), - vcast_vd_d(rtengine::RT_INFINITY))), - result); + result = vsel(vmask_isinf(y), + vsel(vmask_lt(efx, vcast_vd_d(0)), + vcast_vd_d(0), + vsel(vmask_eq(efx, vcast_vd_d(0)), + vcast_vd_d(1.0), + vcast_vd_d(rtengine::RT_INFINITY))), + result); - result = vsel(vorm(vmask_isinf(x), vmask_eq(x, vcast_vd_d(0))), - vmul(vsel(yisodd, vsign(x), vcast_vd_d(1)), - vsel(vmask_lt(vsel(vmask_eq(x, vcast_vd_d(0)), vneg(y), y), vcast_vd_d(0)), - vcast_vd_d(0), - vcast_vd_d(rtengine::RT_INFINITY))), - result); + result = vsel(vorm(vmask_isinf(x), vmask_eq(x, vcast_vd_d(0))), + vmul(vsel(yisodd, vsign(x), vcast_vd_d(1)), + vsel(vmask_lt(vsel(vmask_eq(x, vcast_vd_d(0)), vneg(y), y), vcast_vd_d(0)), + vcast_vd_d(0), + vcast_vd_d(rtengine::RT_INFINITY))), + result); - result = vsel(vorm(vmask_isnan(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), result); + result = vsel(vorm(vmask_isnan(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), result); - result = vsel(vorm(vmask_eq(y, vcast_vd_d(0)), vmask_eq(x, vcast_vd_d(1))), vcast_vd_d(1), result); + result = vsel(vorm(vmask_eq(y, vcast_vd_d(0)), vmask_eq(x, vcast_vd_d(1))), vcast_vd_d(1), result); - return result; + return result; #else - return expk(mul_ds(logk(x), y)); + return expk(mul_ds(logk(x), y)); #endif } static INLINE vdouble2 expk2(vdouble2 d) { - vdouble u = vmul(vadd(d.x, d.y), vcast_vd_d(R_LN2)); - vint q = vrint_vi_vd(u); - vdouble2 s, t; + vdouble u = vmul(vadd(d.x, d.y), vcast_vd_d(R_LN2)); + vint q = vrint_vi_vd(u); + vdouble2 s, t; - s = add2_ds(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); - s = add2_ds(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); + s = add2_ds(d, vmul(vcast_vd_vi(q), vcast_vd_d(-L2U))); + s = add2_ds(s, vmul(vcast_vd_vi(q), vcast_vd_d(-L2L))); - q = vrint_vi_vd(vmin(vmax(vcast_vd_d(-2047.49), u), vcast_vd_d(2047.49))); + q = vrint_vi_vd(vmin(vmax(vcast_vd_d(-2047.49), u), vcast_vd_d(2047.49))); - s = normalize_d(s); + s = normalize_d(s); - u = vcast_vd_d(2.51069683420950419527139e-08); - u = vmla(u, s.x, vcast_vd_d(2.76286166770270649116855e-07)); - u = vmla(u, s.x, vcast_vd_d(2.75572496725023574143864e-06)); - u = vmla(u, s.x, vcast_vd_d(2.48014973989819794114153e-05)); - u = vmla(u, s.x, vcast_vd_d(0.000198412698809069797676111)); - u = vmla(u, s.x, vcast_vd_d(0.0013888888939977128960529)); - u = vmla(u, s.x, vcast_vd_d(0.00833333333332371417601081)); - u = vmla(u, s.x, vcast_vd_d(0.0416666666665409524128449)); - u = vmla(u, s.x, vcast_vd_d(0.166666666666666740681535)); - u = vmla(u, s.x, vcast_vd_d(0.500000000000000999200722)); + u = vcast_vd_d(2.51069683420950419527139e-08); + u = vmla(u, s.x, vcast_vd_d(2.76286166770270649116855e-07)); + u = vmla(u, s.x, vcast_vd_d(2.75572496725023574143864e-06)); + u = vmla(u, s.x, vcast_vd_d(2.48014973989819794114153e-05)); + u = vmla(u, s.x, vcast_vd_d(0.000198412698809069797676111)); + u = vmla(u, s.x, vcast_vd_d(0.0013888888939977128960529)); + u = vmla(u, s.x, vcast_vd_d(0.00833333333332371417601081)); + u = vmla(u, s.x, vcast_vd_d(0.0416666666665409524128449)); + u = vmla(u, s.x, vcast_vd_d(0.166666666666666740681535)); + u = vmla(u, s.x, vcast_vd_d(0.500000000000000999200722)); - t = add_dd(s, mul_ds(squ_d(s), u)); + t = add_dd(s, mul_ds(squ_d(s), u)); - t = add_sd(vcast_vd_d(1), t); + t = add_sd(vcast_vd_d(1), t); - return dd(vldexp(t.x, q), vldexp(t.y, q)); + return dd(vldexp(t.x, q), vldexp(t.y, q)); } static INLINE vdouble xsinh(vdouble x) { - vdouble y = vabs(x); - vdouble2 d = expk2(dd(y, vcast_vd_d(0))); - d = add2_dd(d, div_dd(dd(vcast_vd_d(-1), vcast_vd_d(0)), d)); - y = vmul(vadd(d.x, d.y), vcast_vd_d(0.5)); + vdouble y = vabs(x); + vdouble2 d = expk2(dd(y, vcast_vd_d(0))); + d = add2_dd(d, div_dd(dd(vcast_vd_d(-1), vcast_vd_d(0)), d)); + y = vmul(vadd(d.x, d.y), vcast_vd_d(0.5)); - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); - y = vmulsign(y, x); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); + y = vmulsign(y, x); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble xcosh(vdouble x) { - vdouble2 d = expk2(dd(x, vcast_vd_d(0))); - d = add2_dd(d, div_dd(dd(vcast_vd_d(1), vcast_vd_d(0)), d)); - vdouble y = vmul(vadd(d.x, d.y), vcast_vd_d(0.5)); + vdouble2 d = expk2(dd(x, vcast_vd_d(0))); + d = add2_dd(d, div_dd(dd(vcast_vd_d(1), vcast_vd_d(0)), d)); + vdouble y = vmul(vadd(d.x, d.y), vcast_vd_d(0.5)); - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble xtanh(vdouble x) { - vdouble y = vabs(x); - vdouble2 d = expk2(dd(y, vcast_vd_d(0))); - vdouble2 e = div_dd(dd(vcast_vd_d(1), vcast_vd_d(0)), d); - d = div_dd(add2_dd(d, scale_d(e, vcast_vd_d(-1))), add2_dd(d, e)); - y = d.x + d.y; + vdouble y = vabs(x); + vdouble2 d = expk2(dd(y, vcast_vd_d(0))); + vdouble2 e = div_dd(dd(vcast_vd_d(1), vcast_vd_d(0)), d); + d = div_dd(add2_dd(d, scale_d(e, vcast_vd_d(-1))), add2_dd(d, e)); + y = d.x + d.y; - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(1.0), y); - y = vmulsign(y, x); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(1.0), y); + y = vmulsign(y, x); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble2 logk2(vdouble2 d) { - vdouble2 x, x2, m; - vdouble t; - vint e; + vdouble2 x, x2, m; + vdouble t; + vint e; - d = normalize_d(d); - e = vilogbp1(vmul(d.x, vcast_vd_d(0.7071))); - m = scale_d(d, vldexp(vcast_vd_d(1), vsubi(vcast_vi_i(0), e))); + d = normalize_d(d); + e = vilogbp1(vmul(d.x, vcast_vd_d(0.7071))); + m = scale_d(d, vldexp(vcast_vd_d(1), vsubi(vcast_vi_i(0), e))); - x = div_dd(add2_ds(m, vcast_vd_d(-1)), add2_ds(m, vcast_vd_d(1))); - x2 = squ_d(x); - x2 = normalize_d(x2); + x = div_dd(add2_ds(m, vcast_vd_d(-1)), add2_ds(m, vcast_vd_d(1))); + x2 = squ_d(x); + x2 = normalize_d(x2); - t = vcast_vd_d(0.134601987501262130076155); - t = vmla(t, x2.x, vcast_vd_d(0.132248509032032670243288)); - t = vmla(t, x2.x, vcast_vd_d(0.153883458318096079652524)); - t = vmla(t, x2.x, vcast_vd_d(0.181817427573705403298686)); - t = vmla(t, x2.x, vcast_vd_d(0.222222231326187414840781)); - t = vmla(t, x2.x, vcast_vd_d(0.285714285651261412873718)); - t = vmla(t, x2.x, vcast_vd_d(0.400000000000222439910458)); - t = vmla(t, x2.x, vcast_vd_d(0.666666666666666371239645)); + t = vcast_vd_d(0.134601987501262130076155); + t = vmla(t, x2.x, vcast_vd_d(0.132248509032032670243288)); + t = vmla(t, x2.x, vcast_vd_d(0.153883458318096079652524)); + t = vmla(t, x2.x, vcast_vd_d(0.181817427573705403298686)); + t = vmla(t, x2.x, vcast_vd_d(0.222222231326187414840781)); + t = vmla(t, x2.x, vcast_vd_d(0.285714285651261412873718)); + t = vmla(t, x2.x, vcast_vd_d(0.400000000000222439910458)); + t = vmla(t, x2.x, vcast_vd_d(0.666666666666666371239645)); - return add2_dd(mul_ds(dd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)), - vcast_vd_vi(e)), - add2_dd(scale_d(x, vcast_vd_d(2)), mul_ds(mul_dd(x2, x), t))); + return add2_dd(mul_ds(dd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)), + vcast_vd_vi(e)), + add2_dd(scale_d(x, vcast_vd_d(2)), mul_ds(mul_dd(x2, x), t))); } static INLINE vdouble xasinh(vdouble x) { - vdouble y = vabs(x); - vdouble2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(y, y), vcast_vd_d(1))), y)); - y = vadd(d.x, d.y); + vdouble y = vabs(x); + vdouble2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(y, y), vcast_vd_d(1))), y)); + y = vadd(d.x, d.y); - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); - y = vmulsign(y, x); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); + y = vmulsign(y, x); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble xacosh(vdouble x) { - vdouble2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(x, x), vcast_vd_d(-1))), x)); - vdouble y = vadd(d.x, d.y); + vdouble2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(x, x), vcast_vd_d(-1))), x)); + vdouble y = vadd(d.x, d.y); - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); - y = vsel(vmask_eq(x, vcast_vd_d(1.0)), vcast_vd_d(0.0), y); - y = vsel(vmask_lt(x, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_NAN), y); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_INFINITY), y); + y = vsel(vmask_eq(x, vcast_vd_d(1.0)), vcast_vd_d(0.0), y); + y = vsel(vmask_lt(x, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble xatanh(vdouble x) { - vdouble y = vabs(x); - vdouble2 d = logk2(div_dd(add2_ss(vcast_vd_d(1), y), add2_ss(vcast_vd_d(1), -y))); - y = vsel(vmask_gt(y, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_NAN), vsel(vmask_eq(y, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_INFINITY), vmul(vadd(d.x, d.y), vcast_vd_d(0.5)))); + vdouble y = vabs(x); + vdouble2 d = logk2(div_dd(add2_ss(vcast_vd_d(1), y), add2_ss(vcast_vd_d(1), -y))); + y = vsel(vmask_gt(y, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_NAN), vsel(vmask_eq(y, vcast_vd_d(1.0)), vcast_vd_d(rtengine::RT_INFINITY), vmul(vadd(d.x, d.y), vcast_vd_d(0.5)))); - y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), y); - y = vmulsign(y, x); - y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); + y = vsel(vorm(vmask_isinf(x), vmask_isnan(y)), vcast_vd_d(rtengine::RT_NAN), y); + y = vmulsign(y, x); + y = vsel(vmask_isnan(x), vcast_vd_d(rtengine::RT_NAN), y); - return y; + return y; } static INLINE vdouble xcbrt(vdouble d) { - vdouble x, y, q = vcast_vd_d(1.0); - vint e, qu, re; - vdouble t; + vdouble x, y, q = vcast_vd_d(1.0); + vint e, qu, re; + vdouble t; - e = vilogbp1(vabs(d)); - d = vldexp(d, vsubi(vcast_vi_i(0), e)); + e = vilogbp1(vabs(d)); + d = vldexp(d, vsubi(vcast_vi_i(0), e)); - t = vadd(vcast_vd_vi(e), vcast_vd_d(6144)); - qu = vtruncate_vi_vd(vdiv(t, vcast_vd_d(3))); - re = vtruncate_vi_vd(vsub(t, vmul(vcast_vd_vi(qu), vcast_vd_d(3)))); + t = vadd(vcast_vd_vi(e), vcast_vd_d(6144)); + qu = vtruncate_vi_vd(vdiv(t, vcast_vd_d(3))); + re = vtruncate_vi_vd(vsub(t, vmul(vcast_vd_vi(qu), vcast_vd_d(3)))); - q = vsel(vmaski_eq(re, vcast_vi_i(1)), vcast_vd_d(1.2599210498948731647672106), q); - q = vsel(vmaski_eq(re, vcast_vi_i(2)), vcast_vd_d(1.5874010519681994747517056), q); - q = vldexp(q, vsubi(qu, vcast_vi_i(2048))); + q = vsel(vmaski_eq(re, vcast_vi_i(1)), vcast_vd_d(1.2599210498948731647672106), q); + q = vsel(vmaski_eq(re, vcast_vi_i(2)), vcast_vd_d(1.5874010519681994747517056), q); + q = vldexp(q, vsubi(qu, vcast_vi_i(2048))); - q = vmulsign(q, d); + q = vmulsign(q, d); - d = vabs(d); + d = vabs(d); - x = vcast_vd_d(-0.640245898480692909870982); - x = vmla(x, d, vcast_vd_d(2.96155103020039511818595)); - x = vmla(x, d, vcast_vd_d(-5.73353060922947843636166)); - x = vmla(x, d, vcast_vd_d(6.03990368989458747961407)); - x = vmla(x, d, vcast_vd_d(-3.85841935510444988821632)); - x = vmla(x, d, vcast_vd_d(2.2307275302496609725722)); + x = vcast_vd_d(-0.640245898480692909870982); + x = vmla(x, d, vcast_vd_d(2.96155103020039511818595)); + x = vmla(x, d, vcast_vd_d(-5.73353060922947843636166)); + x = vmla(x, d, vcast_vd_d(6.03990368989458747961407)); + x = vmla(x, d, vcast_vd_d(-3.85841935510444988821632)); + x = vmla(x, d, vcast_vd_d(2.2307275302496609725722)); - y = vmul(x, x); y = vmul(y, y); x = vsub(x, vmul(vmla(d, y, vneg(x)), vcast_vd_d(1.0 / 3.0))); - y = vmul(vmul(d, x), x); - y = vmul(vsub(y, vmul(vmul(vcast_vd_d(2.0 / 3.0), y), vmla(y, x, vcast_vd_d(-1.0)))), q); + y = vmul(x, x); y = vmul(y, y); x = vsub(x, vmul(vmla(d, y, vneg(x)), vcast_vd_d(1.0 / 3.0))); + y = vmul(vmul(d, x), x); + y = vmul(vsub(y, vmul(vmul(vcast_vd_d(2.0 / 3.0), y), vmla(y, x, vcast_vd_d(-1.0)))), q); - return y; + return y; } static INLINE vdouble xexp2(vdouble a) { - vdouble u = expk(mul_ds(dd(vcast_vd_d(0.69314718055994528623), vcast_vd_d(2.3190468138462995584e-17)), a)); - u = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), u); - u = vsel(vmask_isminf(a), vcast_vd_d(0), u); - return u; + vdouble u = expk(mul_ds(dd(vcast_vd_d(0.69314718055994528623), vcast_vd_d(2.3190468138462995584e-17)), a)); + u = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), u); + u = vsel(vmask_isminf(a), vcast_vd_d(0), u); + return u; } static INLINE vdouble xexp10(vdouble a) { - vdouble u = expk(mul_ds(dd(vcast_vd_d(2.3025850929940459011), vcast_vd_d(-2.1707562233822493508e-16)), a)); - u = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), u); - u = vsel(vmask_isminf(a), vcast_vd_d(0), u); - return u; + vdouble u = expk(mul_ds(dd(vcast_vd_d(2.3025850929940459011), vcast_vd_d(-2.1707562233822493508e-16)), a)); + u = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), u); + u = vsel(vmask_isminf(a), vcast_vd_d(0), u); + return u; } static INLINE vdouble xexpm1(vdouble a) { - vdouble2 d = add2_ds(expk2(dd(a, vcast_vd_d(0))), vcast_vd_d(-1.0)); - vdouble x = d.x + d.y; - x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); - x = vsel(vmask_isminf(a), vcast_vd_d(-1), x); - return x; + vdouble2 d = add2_ds(expk2(dd(a, vcast_vd_d(0))), vcast_vd_d(-1.0)); + vdouble x = d.x + d.y; + x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); + x = vsel(vmask_isminf(a), vcast_vd_d(-1), x); + return x; } static INLINE vdouble xlog10(vdouble a) { - vdouble2 d = mul_dd(logk(a), dd(vcast_vd_d(0.43429448190325176116), vcast_vd_d(6.6494347733425473126e-17))); - vdouble x = d.x + d.y; + vdouble2 d = mul_dd(logk(a), dd(vcast_vd_d(0.43429448190325176116), vcast_vd_d(6.6494347733425473126e-17))); + vdouble x = d.x + d.y; - x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); - x = vsel(vmask_gt(vcast_vd_d(0), a), vcast_vd_d(rtengine::RT_NAN), x); - x = vsel(vmask_eq(a, vcast_vd_d(0)), vcast_vd_d(-rtengine::RT_INFINITY), x); + x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); + x = vsel(vmask_gt(vcast_vd_d(0), a), vcast_vd_d(rtengine::RT_NAN), x); + x = vsel(vmask_eq(a, vcast_vd_d(0)), vcast_vd_d(-rtengine::RT_INFINITY), x); - return x; + return x; } static INLINE vdouble xlog1p(vdouble a) { - vdouble2 d = logk2(add2_ss(a, vcast_vd_d(1))); - vdouble x = d.x + d.y; + vdouble2 d = logk2(add2_ss(a, vcast_vd_d(1))); + vdouble x = d.x + d.y; - x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); - x = vsel(vmask_gt(vcast_vd_d(-1), a), vcast_vd_d(rtengine::RT_NAN), x); - x = vsel(vmask_eq(a, vcast_vd_d(-1)), vcast_vd_d(-rtengine::RT_INFINITY), x); + x = vsel(vmask_ispinf(a), vcast_vd_d(rtengine::RT_INFINITY), x); + x = vsel(vmask_gt(vcast_vd_d(-1), a), vcast_vd_d(rtengine::RT_NAN), x); + x = vsel(vmask_eq(a, vcast_vd_d(-1)), vcast_vd_d(-rtengine::RT_INFINITY), x); - return x; + return x; } // typedef struct { - vfloat x, y; + vfloat x, y; } vfloat2; static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); } static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); } #ifdef __SSE4_1__ - // only one instruction when using SSE4.1 - static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) { - return _mm_blendv_ps(y,x,(vfloat)mask); - } +// only one instruction when using SSE4.1 +static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) { + return _mm_blendv_ps(y,x,(vfloat)mask); +} - static INLINE vint vselc(vmask mask, vint x, vint y) { - return _mm_blendv_epi8(y,x,mask); - } +static INLINE vint vselc(vmask mask, vint x, vint y) { + return _mm_blendv_epi8(y,x,mask); +} #else - // three instructions when using SSE2 - static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) { - return (vfloat)vorm(vandm(mask, (vmask)x), vandnotm(mask, (vmask)y)); - } +// three instructions when using SSE2 +static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) { + return (vfloat)vorm(vandm(mask, (vmask)x), vandnotm(mask, (vmask)y)); +} - static INLINE vint vselc(vmask mask, vint x, vint y) { - return vorm(vandm(mask, (vmask)x), vandnotm(mask, (vmask)y)); - } +static INLINE vint vselc(vmask mask, vint x, vint y) { + return vorm(vandm(mask, (vmask)x), vandnotm(mask, (vmask)y)); +} #endif static INLINE vfloat vselfzero(vmask mask, vfloat x) { - // returns value of x if corresponding mask bits are 1, else returns 0 - // faster than vself(mask, x, ZEROV) + // returns value of x if corresponding mask bits are 1, else returns 0 + // faster than vself(mask, x, ZEROV) return _mm_and_ps((vfloat)mask, x); } static INLINE vfloat vselfnotzero(vmask mask, vfloat x) { @@ -939,8 +939,8 @@ static INLINE vfloat vselfnotzero(vmask mask, vfloat x) { } static INLINE vint vselizero(vmask mask, vint x) { - // returns value of x if corresponding mask bits are 1, else returns 0 - // faster than vselc(mask, x, ZEROV) + // returns value of x if corresponding mask bits are 1, else returns 0 + // faster than vselc(mask, x, ZEROV) return _mm_and_si128(mask, x); } static INLINE vint vselinotzero(vmask mask, vint x) { @@ -950,20 +950,20 @@ static INLINE vint vselinotzero(vmask mask, vint x) { } static INLINE vint2 vseli2_lt(vfloat f0, vfloat f1, vint2 x, vint2 y) { - vint2 m2 = vcast_vi2_vm(vmaskf_lt(f0, f1)); - return vori2(vandi2(m2, x), vandnoti2(m2, y)); + vint2 m2 = vcast_vi2_vm(vmaskf_lt(f0, f1)); + return vori2(vandi2(m2, x), vandnoti2(m2, y)); } static INLINE vmask vsignbitf(vfloat f) { - return vandm((vmask)f, (vmask)vcast_vf_f(-0.0f)); + return vandm((vmask)f, (vmask)vcast_vf_f(-0.0f)); } static INLINE vfloat vmulsignf(vfloat x, vfloat y) { - return (vfloat)vxorm((vmask)x, vsignbitf(y)); + return (vfloat)vxorm((vmask)x, vsignbitf(y)); } static INLINE vfloat vsignf(vfloat f) { - return (vfloat)vorm((vmask)vcast_vf_f(1.0f), vandm((vmask)vcast_vf_f(-0.0f), (vmask)f)); + return (vfloat)vorm((vmask)vcast_vf_f(1.0f), vandm((vmask)vcast_vf_f(-0.0f), (vmask)f)); } static INLINE vmask vmaskf_isinf(vfloat d) { return vmaskf_eq(vabsf(d), vcast_vf_f(INFINITYf)); } @@ -976,396 +976,396 @@ static INLINE vfloat visinf2f(vfloat d, vfloat m) { return (vfloat)vandm(vmaskf_ static INLINE vfloat visinff(vfloat d) { return visinf2f(d, vcast_vf_f(1.0f)); } static INLINE vint2 vilogbp1f(vfloat d) { - vmask m = vmaskf_lt(d, vcast_vf_f(5.421010862427522E-20f)); - d = vself(m, vmulf(vcast_vf_f(1.8446744073709552E19f), d), d); - vint2 q = vandi2(vsrli2(vcast_vi2_vm(vreinterpret_vm_vf(d)), 23), vcast_vi2_i(0xff)); - q = vsubi2(q, vseli2(m, vcast_vi2_i(64 + 0x7e), vcast_vi2_i(0x7e))); - return q; + vmask m = vmaskf_lt(d, vcast_vf_f(5.421010862427522E-20f)); + d = vself(m, vmulf(vcast_vf_f(1.8446744073709552E19f), d), d); + vint2 q = vandi2(vsrli2(vcast_vi2_vm(vreinterpret_vm_vf(d)), 23), vcast_vi2_i(0xff)); + q = vsubi2(q, vseli2(m, vcast_vi2_i(64 + 0x7e), vcast_vi2_i(0x7e))); + return q; } static INLINE vfloat vldexpf(vfloat x, vint2 q) { - vfloat u; - vint2 m = vsrai2(q, 31); - m = vslli2(vsubi2(vsrai2(vaddi2(m, q), 6), m), 4); - q = vsubi2(q, vslli2(m, 2)); - u = vreinterpret_vf_vm(vcast_vm_vi2(vslli2(vaddi2(m, vcast_vi2_i(0x7f)), 23))); - x = vmulf(vmulf(vmulf(vmulf(x, u), u), u), u); - u = vreinterpret_vf_vm(vcast_vm_vi2(vslli2(vaddi2(q, vcast_vi2_i(0x7f)), 23))); - return vmulf(x, u); + vfloat u; + vint2 m = vsrai2(q, 31); + m = vslli2(vsubi2(vsrai2(vaddi2(m, q), 6), m), 4); + q = vsubi2(q, vslli2(m, 2)); + u = vreinterpret_vf_vm(vcast_vm_vi2(vslli2(vaddi2(m, vcast_vi2_i(0x7f)), 23))); + x = vmulf(vmulf(vmulf(vmulf(x, u), u), u), u); + u = vreinterpret_vf_vm(vcast_vm_vi2(vslli2(vaddi2(q, vcast_vi2_i(0x7f)), 23))); + return vmulf(x, u); } static INLINE vfloat xsinf(vfloat d) { - vint2 q; - vfloat u, s; + vint2 q; + vfloat u, s; - q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)rtengine::RT_1_PI))); + q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)rtengine::RT_1_PI))); - u = vcast_vf_vi2(q); - d = vmlaf(u, vcast_vf_f(-PI4_Af*4), d); - d = vmlaf(u, vcast_vf_f(-PI4_Bf*4), d); - d = vmlaf(u, vcast_vf_f(-PI4_Cf*4), d); - d = vmlaf(u, vcast_vf_f(-PI4_Df*4), d); + u = vcast_vf_vi2(q); + d = vmlaf(u, vcast_vf_f(-PI4_Af*4), d); + d = vmlaf(u, vcast_vf_f(-PI4_Bf*4), d); + d = vmlaf(u, vcast_vf_f(-PI4_Cf*4), d); + d = vmlaf(u, vcast_vf_f(-PI4_Df*4), d); - s = vmulf(d, d); + s = vmulf(d, d); - d = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vnegf(d), d); + d = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vnegf(d), d); - u = vcast_vf_f(2.6083159809786593541503e-06f); - u = vmlaf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); - u = vmlaf(u, s, vcast_vf_f(0.00833307858556509017944336f)); - u = vmlaf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmlaf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmlaf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmlaf(u, s, vcast_vf_f(-0.166666597127914428710938f)); - u = vmlaf(s, vmulf(u, d), d); + u = vmlaf(s, vmulf(u, d), d); - return u; + return u; } static INLINE vfloat xcosf(vfloat d) { - vint2 q; - vfloat u, s; + vint2 q; + vfloat u, s; - q = vrint_vi2_vf(vsubf(vmulf(d, vcast_vf_f((float)rtengine::RT_1_PI)), vcast_vf_f(0.5f))); - q = vaddi2(vaddi2(q, q), vcast_vi2_i(1)); + q = vrint_vi2_vf(vsubf(vmulf(d, vcast_vf_f((float)rtengine::RT_1_PI)), vcast_vf_f(0.5f))); + q = vaddi2(vaddi2(q, q), vcast_vi2_i(1)); - u = vcast_vf_vi2(q); - d = vmlaf(u, vcast_vf_f(-PI4_Af*2), d); - d = vmlaf(u, vcast_vf_f(-PI4_Bf*2), d); - d = vmlaf(u, vcast_vf_f(-PI4_Cf*2), d); - d = vmlaf(u, vcast_vf_f(-PI4_Df*2), d); + u = vcast_vf_vi2(q); + d = vmlaf(u, vcast_vf_f(-PI4_Af*2), d); + d = vmlaf(u, vcast_vf_f(-PI4_Bf*2), d); + d = vmlaf(u, vcast_vf_f(-PI4_Cf*2), d); + d = vmlaf(u, vcast_vf_f(-PI4_Df*2), d); - s = vmulf(d, d); + s = vmulf(d, d); - d = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), d, vnegf(d)); + d = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), d, vnegf(d)); - u = vcast_vf_f(2.6083159809786593541503e-06f); - u = vmlaf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); - u = vmlaf(u, s, vcast_vf_f(0.00833307858556509017944336f)); - u = vmlaf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmlaf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmlaf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmlaf(u, s, vcast_vf_f(-0.166666597127914428710938f)); - u = vmlaf(s, vmulf(u, d), d); + u = vmlaf(s, vmulf(u, d), d); - return u; + return u; } static INLINE vfloat2 xsincosf(vfloat d) { - vint2 q; - vmask m; - vfloat u, s, t, rx, ry; - vfloat2 r; + vint2 q; + vmask m; + vfloat u, s, t, rx, ry; + vfloat2 r; - q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)rtengine::RT_2_PI))); + q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)rtengine::RT_2_PI))); - s = d; + s = d; - u = vcast_vf_vi2(q); - s = vmlaf(u, vcast_vf_f(-PI4_Af*2), s); - s = vmlaf(u, vcast_vf_f(-PI4_Bf*2), s); - s = vmlaf(u, vcast_vf_f(-PI4_Cf*2), s); - s = vmlaf(u, vcast_vf_f(-PI4_Df*2), s); + u = vcast_vf_vi2(q); + s = vmlaf(u, vcast_vf_f(-PI4_Af*2), s); + s = vmlaf(u, vcast_vf_f(-PI4_Bf*2), s); + s = vmlaf(u, vcast_vf_f(-PI4_Cf*2), s); + s = vmlaf(u, vcast_vf_f(-PI4_Df*2), s); - t = s; + t = s; - s = vmulf(s, s); + s = vmulf(s, s); - u = vcast_vf_f(-0.000195169282960705459117889f); - u = vmlaf(u, s, vcast_vf_f(0.00833215750753879547119141f)); - u = vmlaf(u, s, vcast_vf_f(-0.166666537523269653320312f)); - u = vmulf(vmulf(u, s), t); + u = vcast_vf_f(-0.000195169282960705459117889f); + u = vmlaf(u, s, vcast_vf_f(0.00833215750753879547119141f)); + u = vmlaf(u, s, vcast_vf_f(-0.166666537523269653320312f)); + u = vmulf(vmulf(u, s), t); - rx = vaddf(t, u); + rx = vaddf(t, u); - u = vcast_vf_f(-2.71811842367242206819355e-07f); - u = vmlaf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); - u = vmlaf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); - u = vmlaf(u, s, vcast_vf_f(0.0416666641831398010253906f)); - u = vmlaf(u, s, vcast_vf_f(-0.5)); + u = vcast_vf_f(-2.71811842367242206819355e-07f); + u = vmlaf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); + u = vmlaf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); + u = vmlaf(u, s, vcast_vf_f(0.0416666641831398010253906f)); + u = vmlaf(u, s, vcast_vf_f(-0.5)); - ry = vaddf(vcast_vf_f(1), vmulf(s, u)); + ry = vaddf(vcast_vf_f(1), vmulf(s, u)); - m = vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); - r.x = vself(m, rx, ry); - r.y = vself(m, ry, rx); + m = vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); + r.x = vself(m, rx, ry); + r.y = vself(m, ry, rx); - m = vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); - r.x = vreinterpret_vf_vm(vxorm(vandm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); + m = vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + r.x = vreinterpret_vf_vm(vxorm(vandm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); - m = vmaski2_eq(vandi2(vaddi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); - r.y = vreinterpret_vf_vm(vxorm(vandm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); + m = vmaski2_eq(vandi2(vaddi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); + r.y = vreinterpret_vf_vm(vxorm(vandm(m, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); - m = vmaskf_isinf(d); - r.x = vself(m, vcast_vf_f(rtengine::RT_NAN), r.x); - r.y = vself(m, vcast_vf_f(rtengine::RT_NAN), r.y); + m = vmaskf_isinf(d); + r.x = vself(m, vcast_vf_f(rtengine::RT_NAN), r.x); + r.y = vself(m, vcast_vf_f(rtengine::RT_NAN), r.y); - return r; + return r; } static INLINE vfloat xtanf(vfloat d) { - vint2 q; - vmask m; - vfloat u, s, x; + vint2 q; + vmask m; + vfloat u, s, x; - q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)(2 * rtengine::RT_1_PI)))); + q = vrint_vi2_vf(vmulf(d, vcast_vf_f((float)(2 * rtengine::RT_1_PI)))); - x = d; + x = d; - u = vcast_vf_vi2(q); - x = vmlaf(u, vcast_vf_f(-PI4_Af*2), x); - x = vmlaf(u, vcast_vf_f(-PI4_Bf*2), x); - x = vmlaf(u, vcast_vf_f(-PI4_Cf*2), x); - x = vmlaf(u, vcast_vf_f(-PI4_Df*2), x); + u = vcast_vf_vi2(q); + x = vmlaf(u, vcast_vf_f(-PI4_Af*2), x); + x = vmlaf(u, vcast_vf_f(-PI4_Bf*2), x); + x = vmlaf(u, vcast_vf_f(-PI4_Cf*2), x); + x = vmlaf(u, vcast_vf_f(-PI4_Df*2), x); - s = vmulf(x, x); + s = vmulf(x, x); - m = vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); - x = vself(m, vnegf(x), x); + m = vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); + x = vself(m, vnegf(x), x); - u = vcast_vf_f(0.00927245803177356719970703f); - u = vmlaf(u, s, vcast_vf_f(0.00331984995864331722259521f)); - u = vmlaf(u, s, vcast_vf_f(0.0242998078465461730957031f)); - u = vmlaf(u, s, vcast_vf_f(0.0534495301544666290283203f)); - u = vmlaf(u, s, vcast_vf_f(0.133383005857467651367188f)); - u = vmlaf(u, s, vcast_vf_f(0.333331853151321411132812f)); + u = vcast_vf_f(0.00927245803177356719970703f); + u = vmlaf(u, s, vcast_vf_f(0.00331984995864331722259521f)); + u = vmlaf(u, s, vcast_vf_f(0.0242998078465461730957031f)); + u = vmlaf(u, s, vcast_vf_f(0.0534495301544666290283203f)); + u = vmlaf(u, s, vcast_vf_f(0.133383005857467651367188f)); + u = vmlaf(u, s, vcast_vf_f(0.333331853151321411132812f)); - u = vmlaf(s, vmulf(u, x), x); + u = vmlaf(s, vmulf(u, x), x); - u = vself(m, vrecf(u), u); + u = vself(m, vrecf(u), u); - u = vself(vmaskf_isinf(d), vcast_vf_f(NANf), u); + u = vself(vmaskf_isinf(d), vcast_vf_f(NANf), u); - return u; + return u; } static INLINE vfloat xatanf(vfloat s) { - vfloat t, u; - vint2 q; + vfloat t, u; + vint2 q; - q = vseli2_lt(s, vcast_vf_f(0.0f), vcast_vi2_i(2), vcast_vi2_i(0)); - s = vabsf(s); + q = vseli2_lt(s, vcast_vf_f(0.0f), vcast_vi2_i(2), vcast_vi2_i(0)); + s = vabsf(s); - q = vseli2_lt(vcast_vf_f(1.0f), s, vaddi2(q, vcast_vi2_i(1)), q); - s = vself(vmaskf_lt(vcast_vf_f(1.0f), s), vdivf(vcast_vf_f(1.0f), s), s); + q = vseli2_lt(vcast_vf_f(1.0f), s, vaddi2(q, vcast_vi2_i(1)), q); + s = vself(vmaskf_lt(vcast_vf_f(1.0f), s), vdivf(vcast_vf_f(1.0f), s), s); - t = vmulf(s, s); + t = vmulf(s, s); - u = vcast_vf_f(0.00282363896258175373077393f); - u = vmlaf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); - u = vmlaf(u, t, vcast_vf_f(0.0425049886107444763183594f)); - u = vmlaf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); - u = vmlaf(u, t, vcast_vf_f(0.106347933411598205566406f)); - u = vmlaf(u, t, vcast_vf_f(-0.142027363181114196777344f)); - u = vmlaf(u, t, vcast_vf_f(0.199926957488059997558594f)); - u = vmlaf(u, t, vcast_vf_f(-0.333331018686294555664062f)); + u = vcast_vf_f(0.00282363896258175373077393f); + u = vmlaf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); + u = vmlaf(u, t, vcast_vf_f(0.0425049886107444763183594f)); + u = vmlaf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); + u = vmlaf(u, t, vcast_vf_f(0.106347933411598205566406f)); + u = vmlaf(u, t, vcast_vf_f(-0.142027363181114196777344f)); + u = vmlaf(u, t, vcast_vf_f(0.199926957488059997558594f)); + u = vmlaf(u, t, vcast_vf_f(-0.333331018686294555664062f)); - t = vaddf(s, vmulf(s, vmulf(t, u))); + t = vaddf(s, vmulf(s, vmulf(t, u))); - t = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), t), t); - t = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vnegf(t), t); + t = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), t), t); + t = vself(vmaski2_eq(vandi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vnegf(t), t); - return t; + return t; } static INLINE vfloat atan2kf(vfloat y, vfloat x) { - vfloat s, t, u; - vint2 q; - vmask p; + vfloat s, t, u; + vint2 q; + vmask p; - q = vseli2_lt(x, vcast_vf_f(0.0f), vcast_vi2_i(-2), vcast_vi2_i(0)); - x = vabsf(x); + q = vseli2_lt(x, vcast_vf_f(0.0f), vcast_vi2_i(-2), vcast_vi2_i(0)); + x = vabsf(x); - q = vseli2_lt(x, y, vaddi2(q, vcast_vi2_i(1)), q); - p = vmaskf_lt(x, y); - s = vself(p, vnegf(x), y); - t = vmaxf(x, y); + q = vseli2_lt(x, y, vaddi2(q, vcast_vi2_i(1)), q); + p = vmaskf_lt(x, y); + s = vself(p, vnegf(x), y); + t = vmaxf(x, y); - s = vdivf(s, t); - t = vmulf(s, s); + s = vdivf(s, t); + t = vmulf(s, s); - u = vcast_vf_f(0.00282363896258175373077393f); - u = vmlaf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); - u = vmlaf(u, t, vcast_vf_f(0.0425049886107444763183594f)); - u = vmlaf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); - u = vmlaf(u, t, vcast_vf_f(0.106347933411598205566406f)); - u = vmlaf(u, t, vcast_vf_f(-0.142027363181114196777344f)); - u = vmlaf(u, t, vcast_vf_f(0.199926957488059997558594f)); - u = vmlaf(u, t, vcast_vf_f(-0.333331018686294555664062f)); + u = vcast_vf_f(0.00282363896258175373077393f); + u = vmlaf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); + u = vmlaf(u, t, vcast_vf_f(0.0425049886107444763183594f)); + u = vmlaf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); + u = vmlaf(u, t, vcast_vf_f(0.106347933411598205566406f)); + u = vmlaf(u, t, vcast_vf_f(-0.142027363181114196777344f)); + u = vmlaf(u, t, vcast_vf_f(0.199926957488059997558594f)); + u = vmlaf(u, t, vcast_vf_f(-0.333331018686294555664062f)); - t = vaddf(s, vmulf(s, vmulf(t, u))); - t = vaddf(t, vmulf(vcast_vf_vi2(q), vcast_vf_f((float)(rtengine::RT_PI/2)))); + t = vaddf(s, vmulf(s, vmulf(t, u))); + t = vaddf(t, vmulf(vcast_vf_vi2(q), vcast_vf_f((float)(rtengine::RT_PI/2)))); - return t; + return t; } static INLINE vfloat xatan2f(vfloat y, vfloat x) { - vfloat r = atan2kf(vabsf(y), x); + vfloat r = atan2kf(vabsf(y), x); - r = vmulsignf(r, x); - r = vself(vorm(vmaskf_isinf(x), vmaskf_eq(x, vcast_vf_f(0.0f))), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(rtengine::RT_PI/2)), x))), r); - r = vself(vmaskf_isinf(y), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(rtengine::RT_PI/4)), x))), r); - r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vselfzero(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)rtengine::RT_PI)), r); + r = vmulsignf(r, x); + r = vself(vorm(vmaskf_isinf(x), vmaskf_eq(x, vcast_vf_f(0.0f))), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(rtengine::RT_PI/2)), x))), r); + r = vself(vmaskf_isinf(y), vsubf(vcast_vf_f((float)(rtengine::RT_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(rtengine::RT_PI/4)), x))), r); + r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vselfzero(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)rtengine::RT_PI)), r); - return vself(vmaskf_isnan(x, y), vcast_vf_f(NANf), vmulsignf(r, y)); + return vself(vmaskf_isnan(x, y), vcast_vf_f(NANf), vmulsignf(r, y)); } static INLINE vfloat xasinf(vfloat d) { - vfloat x, y; - x = vaddf(vcast_vf_f(1.0f), d); - y = vsubf(vcast_vf_f(1.0f), d); - x = vmulf(x, y); - x = vsqrtf(x); - x = vself(vmaskf_isnan(x), vcast_vf_f(NANf), atan2kf(vabsf(d), x)); - return vmulsignf(x, d); + vfloat x, y; + x = vaddf(vcast_vf_f(1.0f), d); + y = vsubf(vcast_vf_f(1.0f), d); + x = vmulf(x, y); + x = vsqrtf(x); + x = vself(vmaskf_isnan(x), vcast_vf_f(NANf), atan2kf(vabsf(d), x)); + return vmulsignf(x, d); } static INLINE vfloat xacosf(vfloat d) { - vfloat x, y; - x = vaddf(vcast_vf_f(1.0f), d); - y = vsubf(vcast_vf_f(1.0f), d); - x = vmulf(x, y); - x = vsqrtf(x); - x = vmulsignf(atan2kf(x, vabsf(d)), d); - y = (vfloat)vandm(vmaskf_lt(d, vcast_vf_f(0.0f)), (vmask)vcast_vf_f((float)rtengine::RT_PI)); - x = vaddf(x, y); - return x; + vfloat x, y; + x = vaddf(vcast_vf_f(1.0f), d); + y = vsubf(vcast_vf_f(1.0f), d); + x = vmulf(x, y); + x = vsqrtf(x); + x = vmulsignf(atan2kf(x, vabsf(d)), d); + y = (vfloat)vandm(vmaskf_lt(d, vcast_vf_f(0.0f)), (vmask)vcast_vf_f((float)rtengine::RT_PI)); + x = vaddf(x, y); + return x; } static INLINE vfloat xlogf(vfloat d) { - vfloat x, x2, t, m; - vint2 e; + vfloat x, x2, t, m; + vint2 e; - e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); - m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); + m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); - x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); - x2 = vmulf(x, x); + x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); + x2 = vmulf(x, x); - t = vcast_vf_f(0.2371599674224853515625f); - t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); - t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); - t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); - t = vmlaf(t, x2, vcast_vf_f(2.0f)); + t = vcast_vf_f(0.2371599674224853515625f); + t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); + t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); + t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); + t = vmlaf(t, x2, vcast_vf_f(2.0f)); - x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); - x = vself(vmaskf_ispinf(d), vcast_vf_f(INFINITYf), x); - x = vself(vmaskf_gt(vcast_vf_f(0), d), vcast_vf_f(NANf), x); - x = vself(vmaskf_eq(d, vcast_vf_f(0)), vcast_vf_f(-INFINITYf), x); + x = vself(vmaskf_ispinf(d), vcast_vf_f(INFINITYf), x); + x = vself(vmaskf_gt(vcast_vf_f(0), d), vcast_vf_f(NANf), x); + x = vself(vmaskf_eq(d, vcast_vf_f(0)), vcast_vf_f(-INFINITYf), x); - return x; + return x; } static INLINE vfloat xlogf0(vfloat d) { - vfloat x, x2, t, m; - vint2 e; + vfloat x, x2, t, m; + vint2 e; - e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); - m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); + m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); - x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); - x2 = vmulf(x, x); + x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); + x2 = vmulf(x, x); - t = vcast_vf_f(0.2371599674224853515625f); - t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); - t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); - t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); - t = vmlaf(t, x2, vcast_vf_f(2.0f)); + t = vcast_vf_f(0.2371599674224853515625f); + t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); + t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); + t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); + t = vmlaf(t, x2, vcast_vf_f(2.0f)); - x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); - x = vself(vmaskf_ispinf(d), vcast_vf_f(0), x); - x = vself(vmaskf_gt(vcast_vf_f(0), d), vcast_vf_f(0), x); - x = vself(vmaskf_eq(d, vcast_vf_f(0)), vcast_vf_f(0), x); + x = vself(vmaskf_ispinf(d), vcast_vf_f(0), x); + x = vself(vmaskf_gt(vcast_vf_f(0), d), vcast_vf_f(0), x); + x = vself(vmaskf_eq(d, vcast_vf_f(0)), vcast_vf_f(0), x); - return x; + return x; } static INLINE vfloat xlogfNoCheck(vfloat d) { // this version does not check input values. Use it only when you know the input values are > 0 e.g. when filling a lookup table - vfloat x, x2, t, m; - vint2 e; + vfloat x, x2, t, m; + vint2 e; - e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); - m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); + m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); - x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); - x2 = vmulf(x, x); + x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); + x2 = vmulf(x, x); - t = vcast_vf_f(0.2371599674224853515625f); - t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); - t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); - t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); - t = vmlaf(t, x2, vcast_vf_f(2.0f)); + t = vcast_vf_f(0.2371599674224853515625f); + t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); + t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); + t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); + t = vmlaf(t, x2, vcast_vf_f(2.0f)); - return vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + return vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); } static INLINE vfloat xexpf(vfloat d) { - vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); - vfloat s, u; + vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; - s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf),d); - s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf),s); + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf),d); + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf),s); - u = vcast_vf_f(0.00136324646882712841033936f); - u = vmlaf(u, s, vcast_vf_f(0.00836596917361021041870117f)); - u = vmlaf(u, s, vcast_vf_f(0.0416710823774337768554688f)); - u = vmlaf(u, s, vcast_vf_f(0.166665524244308471679688f)); - u = vmlaf(u, s, vcast_vf_f(0.499999850988388061523438f)); + u = vcast_vf_f(0.00136324646882712841033936f); + u = vmlaf(u, s, vcast_vf_f(0.00836596917361021041870117f)); + u = vmlaf(u, s, vcast_vf_f(0.0416710823774337768554688f)); + u = vmlaf(u, s, vcast_vf_f(0.166665524244308471679688f)); + u = vmlaf(u, s, vcast_vf_f(0.499999850988388061523438f)); - u = vaddf(vcast_vf_f(1.0f), vmlaf(vmulf(s, s), u, s)); + u = vaddf(vcast_vf_f(1.0f), vmlaf(vmulf(s, s), u, s)); - u = vldexpf(u, q); + u = vldexpf(u, q); - // -104.0 - return vselfnotzero(vmaskf_gt(vcast_vf_f(-104.f), d), u); + // -104.0 + return vselfnotzero(vmaskf_gt(vcast_vf_f(-104.f), d), u); } static INLINE vfloat xexpfNoCheck(vfloat d) { // this version does not check input values. Use it only when you know the input values are > -104.f e.g. when filling a lookup table - vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); - vfloat s, u; + vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; - s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf),d); - s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf),s); + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf),d); + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf),s); - u = vcast_vf_f(0.00136324646882712841033936f); - u = vmlaf(u, s, vcast_vf_f(0.00836596917361021041870117f)); - u = vmlaf(u, s, vcast_vf_f(0.0416710823774337768554688f)); - u = vmlaf(u, s, vcast_vf_f(0.166665524244308471679688f)); - u = vmlaf(u, s, vcast_vf_f(0.499999850988388061523438f)); + u = vcast_vf_f(0.00136324646882712841033936f); + u = vmlaf(u, s, vcast_vf_f(0.00836596917361021041870117f)); + u = vmlaf(u, s, vcast_vf_f(0.0416710823774337768554688f)); + u = vmlaf(u, s, vcast_vf_f(0.166665524244308471679688f)); + u = vmlaf(u, s, vcast_vf_f(0.499999850988388061523438f)); - u = vaddf(vcast_vf_f(1.0f), vmlaf(vmulf(s, s), u, s)); + u = vaddf(vcast_vf_f(1.0f), vmlaf(vmulf(s, s), u, s)); - return vldexpf(u, q); + return vldexpf(u, q); } static INLINE vfloat xcbrtf(vfloat d) { - vfloat x, y, q = vcast_vf_f(1.0), t; - vint2 e, qu, re; + vfloat x, y, q = vcast_vf_f(1.0), t; + vint2 e, qu, re; - e = vilogbp1f(vabsf(d)); - d = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + e = vilogbp1f(vabsf(d)); + d = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); - t = vaddf(vcast_vf_vi2(e), vcast_vf_f(6144)); - qu = vtruncate_vi2_vf(vdivf(t, vcast_vf_f(3))); - re = vtruncate_vi2_vf(vsubf(t, vmulf(vcast_vf_vi2(qu), vcast_vf_f(3)))); + t = vaddf(vcast_vf_vi2(e), vcast_vf_f(6144)); + qu = vtruncate_vi2_vf(vdivf(t, vcast_vf_f(3))); + re = vtruncate_vi2_vf(vsubf(t, vmulf(vcast_vf_vi2(qu), vcast_vf_f(3)))); - q = vself(vmaski2_eq(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q); - q = vself(vmaski2_eq(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q); - q = vldexpf(q, vsubi2(qu, vcast_vi2_i(2048))); + q = vself(vmaski2_eq(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q); + q = vself(vmaski2_eq(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q); + q = vldexpf(q, vsubi2(qu, vcast_vi2_i(2048))); - q = vmulsignf(q, d); - d = vabsf(d); + q = vmulsignf(q, d); + d = vabsf(d); - x = vcast_vf_f(-0.601564466953277587890625f); - x = vmlaf(x, d, vcast_vf_f(2.8208892345428466796875f)); - x = vmlaf(x, d, vcast_vf_f(-5.532182216644287109375f)); - x = vmlaf(x, d, vcast_vf_f(5.898262500762939453125f)); - x = vmlaf(x, d, vcast_vf_f(-3.8095417022705078125f)); - x = vmlaf(x, d, vcast_vf_f(2.2241256237030029296875f)); + x = vcast_vf_f(-0.601564466953277587890625f); + x = vmlaf(x, d, vcast_vf_f(2.8208892345428466796875f)); + x = vmlaf(x, d, vcast_vf_f(-5.532182216644287109375f)); + x = vmlaf(x, d, vcast_vf_f(5.898262500762939453125f)); + x = vmlaf(x, d, vcast_vf_f(-3.8095417022705078125f)); + x = vmlaf(x, d, vcast_vf_f(2.2241256237030029296875f)); - y = vmulf(vmulf(d, x), x); - y = vmulf(vsubf(y, vmulf(vmulf(vcast_vf_f(2.0f / 3.0f), y), vmlaf(y, x, vcast_vf_f(-1.0f)))), q); + y = vmulf(vmulf(d, x), x); + y = vmulf(vsubf(y, vmulf(vmulf(vcast_vf_f(2.0f / 3.0f), y), vmlaf(y, x, vcast_vf_f(-1.0f)))), q); - return y; + return y; } static INLINE vfloat vclampf(vfloat value, vfloat low, vfloat high) { @@ -1374,7 +1374,7 @@ static INLINE vfloat vclampf(vfloat value, vfloat low, vfloat high) { } static INLINE vfloat SQRV(vfloat a){ - return a * a; + return a * a; } static inline void vswap( vmask condition, vfloat &a, vfloat &b) { @@ -1404,7 +1404,7 @@ static inline float vhmax(vfloat a) { static INLINE vfloat vmul2f(vfloat a){ // fastest way to multiply by 2 - return a + a; + return a + a; } static INLINE vfloat vintpf(vfloat a, vfloat b, vfloat c) { diff --git a/rtexif/CMakeLists.txt b/rtexif/CMakeLists.txt index 0a38a3a35..5a3831455 100644 --- a/rtexif/CMakeLists.txt +++ b/rtexif/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(rtexif STATIC olympusattribs.cc panasonicattribs.cc pentaxattribs.cc - rtexif.cc + rtexif.cc sonyminoltaattribs.cc stdattribs.cc ) diff --git a/rtgui/coloredbar.cc b/rtgui/coloredbar.cc index ca9a381cf..adafcbdde 100644 --- a/rtgui/coloredbar.cc +++ b/rtgui/coloredbar.cc @@ -142,7 +142,7 @@ void ColoredBar::updateBackBuffer(Gtk::DrawingArea &drawingArea) case (RTO_Left2Right): for (int py = 0; py < h; ++py) { for (int px = 0; px < w; ++px) { - unsigned char *pixel = surfaceData + (py * w + px) * 4; + unsigned char *pixel = surfaceData + (py * w + px) * 4; double x_ = double( px); //double y_ = double((h-1)-py); unused double x01 = x_ / double(w - 1); @@ -158,7 +158,7 @@ void ColoredBar::updateBackBuffer(Gtk::DrawingArea &drawingArea) case (RTO_Right2Left): for (int py = 0; py < h; ++py) { for (int px = 0; px < w; ++px) { - unsigned char *pixel = surfaceData + (py * w + px) * 4; + unsigned char *pixel = surfaceData + (py * w + px) * 4; //double x_ = double((w-1)-px); unused //double y_ = double((h-1)-py); unused double x01 = double(px) / double(w - 1); @@ -174,7 +174,7 @@ void ColoredBar::updateBackBuffer(Gtk::DrawingArea &drawingArea) case (RTO_Bottom2Top): for (int py = 0; py < h; ++py) { for (int px = 0; px < w; ++px) { - unsigned char *pixel = surfaceData + (py * w + px) * 4; + unsigned char *pixel = surfaceData + (py * w + px) * 4; //double x_ = double((w-1)-px); unused //double y_ = double((h-1)-py); unused double x01 = double(px) / double(w - 1); @@ -191,7 +191,7 @@ void ColoredBar::updateBackBuffer(Gtk::DrawingArea &drawingArea) default: for (int py = 0; py < h; ++py) { for (int px = 0; px < w; ++px) { - unsigned char *pixel = surfaceData + (py * w + px) * 4; + unsigned char *pixel = surfaceData + (py * w + px) * 4; double x_ = double( px); double y_ = double( py); double x01 = x_ / double(w - 1); diff --git a/rtgui/main.cc b/rtgui/main.cc index 098963e0e..631d3db0a 100644 --- a/rtgui/main.cc +++ b/rtgui/main.cc @@ -529,13 +529,13 @@ int main (int argc, char **argv) int ret = 0; if (options.pseudoHiDPISupport) { - // Reading/updating GDK_SCALE early if it exists - const gchar *gscale = g_getenv("GDK_SCALE"); - if (gscale && gscale[0] == '2') { - initialGdkScale = 2; - } - // HOMBRE: On Windows, if resolution is set to 200%, Gtk internal variables are SCALE=2 and DPI=96 - g_setenv("GDK_SCALE", "1", true); + // Reading/updating GDK_SCALE early if it exists + const gchar *gscale = g_getenv("GDK_SCALE"); + if (gscale && gscale[0] == '2') { + initialGdkScale = 2; + } + // HOMBRE: On Windows, if resolution is set to 200%, Gtk internal variables are SCALE=2 and DPI=96 + g_setenv("GDK_SCALE", "1", true); } gdk_threads_set_lock_functions (G_CALLBACK (myGdkLockEnter), (G_CALLBACK (myGdkLockLeave))); diff --git a/rtgui/options.cc b/rtgui/options.cc index 20e20620a..737607bcf 100644 --- a/rtgui/options.cc +++ b/rtgui/options.cc @@ -1258,7 +1258,7 @@ void Options::readFromFile(Glib::ustring fname) } if (keyFile.has_key("GUI", "PseudoHiDPISupport")) { - pseudoHiDPISupport = keyFile.get_boolean("GUI", "PseudoHiDPISupport"); + pseudoHiDPISupport = keyFile.get_boolean("GUI", "PseudoHiDPISupport"); } if (keyFile.has_key("GUI", "LastPreviewScale")) { diff --git a/rtgui/rtscalable.cc b/rtgui/rtscalable.cc index 15211a7ee..b37f2276f 100644 --- a/rtgui/rtscalable.cc +++ b/rtgui/rtscalable.cc @@ -37,9 +37,9 @@ Gtk::TextDirection RTScalable::direction = Gtk::TextDirection::TEXT_DIR_NONE; void RTScalable::setDPInScale (const double newDPI, const int newScale) { if (!options.pseudoHiDPISupport) { - scale = 1; - dpi = baseDPI; - return; + scale = 1; + dpi = baseDPI; + return; } if (scale != newScale || (scale == 1 && dpi != newDPI)) { diff --git a/rtgui/rtwindow.cc b/rtgui/rtwindow.cc index 3983f93b1..fc9be32f7 100644 --- a/rtgui/rtwindow.cc +++ b/rtgui/rtwindow.cc @@ -168,7 +168,7 @@ RTWindow::RTWindow () #endif //GTK318 if (options.pseudoHiDPISupport) { - fontScale = options.fontSize / (float)RTScalable::baseFontSize; + fontScale = options.fontSize / (float)RTScalable::baseFontSize; } if (options.rtSettings.verbose) { printf("\"Non-Default\" font size(%d) * scale(%d) / fontScale(%.3f)\n", options.fontSize, (int)initialGdkScale, fontScale); @@ -196,7 +196,7 @@ RTWindow::RTWindow () pt = fontSize / Pango::SCALE; } if (options.pseudoHiDPISupport) { - fontScale = (float)pt / (float)RTScalable::baseFontSize; + fontScale = (float)pt / (float)RTScalable::baseFontSize; } if ((int)initialGdkScale > 1 || pt != RTScalable::baseFontSize) { css = Glib::ustring::compose ("* { font-size: %1pt}", pt * (int)initialGdkScale); diff --git a/tools/RTProfileBuilderSample.cs b/tools/RTProfileBuilderSample.cs index a097e6883..80474b63b 100644 --- a/tools/RTProfileBuilderSample.cs +++ b/tools/RTProfileBuilderSample.cs @@ -1,293 +1,293 @@ -#region Usings -using System; -using System.Text; -using System.IO; -using System.Globalization; -using System.Diagnostics; -using System.Configuration; -using System.Collections; -using System.Collections.Specialized; -#endregion - -// *** Raw Therapee sample Custom Profile builder (version 2013-08-12) *** -// -// -// WARNING: The command line parameters has changed since this file has been created by Oduis. The new mechanism involves a -// temporary communication file (.ini style) to provide system parameters and metadata read by RawTherapee. This script has -// to be updated by some C# developer in order to work. -// -// -// WARNING: PP3 format may change in the future versions! If this happens there will probably be no automatic migration path, -// you'll have to adjust on your own. This is a sample, and therefore not supported by the RT team (just by oduis) -// -// -// How to use: -// 1. Modify the GetCorrectedSettings function below according to your needs. -// 2. Download and install Microsoft .Net Runtime (latest version is 4.0 as of writing), if it's not already on your machine. -// You can get it for free via Windows Update or from microsoft.com. No need for Visual Studio etc. -// 3. Open a command line and compile this CS-File using the C# 32bit compiler. It is usually installed somewhere here: -// C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc.exe -// Call csc.exe (C#-Compiler) with your .CS file as parameter like this (one big line): -// -// C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc -// /r:C:\Windows\Microsoft.NET\Framework\v4.0.30319\System.dll -// /r:C:\Windows\Microsoft.NET\Framework\v4.0.30319\System.Configuration.dll -// RTProfileBuilderSample.cs -// -// (On most machines it already works with "C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc RTProfileBuilderSample.cs") -// CSC will compile it and emit an EXE. -// 4. Open your RT options files and find the entry [Profiles]/CustomProfileBuilder -// 5. Enter the path to your newly built exe here. On Windows, don't forget double slashes (e.g. "C:\\MyDir\\Mybuilder.exe") -// And you're done! The EXE is only called on opening the image editor and there is no PP3 already -// -// If you want to use EXIFTOOL to gather more details information to build queries: -// 1. Download exiftool.exe from http://www.sno.phy.queensu.ca/~phil/exiftool/ -// 2. Rename it to exiftool.exe (NOT exiftool(-k).. or something!) -// 3. Copy the RTProfilerBuilder.exe.config next to your own EXE. If you renamed it, rename config to "(Yourname).exe.config" -// 4. Open the config with notepad (it's an XML file). Set ExifToolPath to your downloaded and renamed exe -// -// If you want to know what parameters are available, call "exiftool.exe -tab -short" -// -// This description is for Windows. The C# code does not use anything fancy, will probably work with MONO on Linux/OSX, too - -namespace RTProfilerBuilder { - /// Main class. Mostly change GetCorrectedSettings. - class RTProfileBuilder { - - /// Adds the Nikkor zoom distortion correction profile. - /// First array is list of focal lengths, second array is the RT setting that should correct the - /// distortion for the corresponding focal length. Values between these values are automatically interpolated. - /// The focal length values must already be ordered. The number of sample points is not limited. - static DistortionCorrectProf distNikkor24120f4 = new DistortionCorrectProf( - new double[] { 24, 28, 35, 50, 70, 85, 120 }, - new double[] { -0.1, -0.063, -0.012, 0.018, 0.034, 0.04, 0.048 } - ); - - - /// This is your personalisation function - /// Full EXIF from EXIFTOOL (if configured). - /// Entry, like "Sharpening/Radius" - /// Current value (from default file) - /// FNumberExposure in seconds - /// Focal length in MMISO value - /// Lens from EXIFCamera from EXIF - /// The value to be written. Simply take the current value if you have nothing to touch. - static string GetCorrectedSetting(NameValueCollection exif, string sectionEntry, string value, - double fNumber, double exposureSecs, double focalLength, long iso, string lens, string camera) { - - string s; - - // We don't do anything to the value if it's not our camera - if (camera.EndsWith("NIKON D700", StringComparison.InvariantCultureIgnoreCase) && lens.Contains("24.0-120.0 mm f/4.0")) { - switch (sectionEntry) { - // Here is the place to adjust your settings - // Pretty simple: "SectionName/EntryName" in options file - - case "Vignetting Correction/Amount": - value = (fNumber < 8 && focalLength < 30) ? "30" : "0"; - break; - - case "RAW/CA": - value = ToBool(fNumber < 11); // Means "Enabled if fnumber<11, otherwise disabled" - break; - - case "Impulse Denoising/Enabled": - value = ToBool(iso >= 3200); - break; - - case "HLRecovery/Enabled": - value = ToBool(iso >= 1600); // Dynamic range decreases, so we'll probably need it - break; - - case "Color Boost/Amount": - if (iso >= 6400) value = "0"; // Colors will get poppy anyway... - break; - - case "Distortion/Amount": - // we already checked in the IF upstairs that this is "our" lens - value = distNikkor24120f4.GetDistortionAmount(focalLength); - break; - - // Add other parameters here. Mention this is case sensitive! - - default: break; // we don't touch values we don't care about - } - } // end if camera=xxx - - - // This is for camera independent settings - switch (sectionEntry) { - // These are parsed from EXIFTOOL and XMP in DNG (see http://en.wikipedia.org/wiki/Extensible_Metadata_Platform) - case "IPTC/City": - s = exif.Get("City"); - if (!String.IsNullOrEmpty(s)) value = s; - break; - - case "IPTC/Country": - s = exif.Get("Country"); - if (!String.IsNullOrEmpty(s)) value = s; - break; - - case "IPTC/Caption": - case "IPTC/Title": - s = exif.Get("Headline"); - if (!String.IsNullOrEmpty(s)) value = s; - break; - - // Add other parameters here. Mention this is case sensitive! - - default: break; // we don't touch values we don't care about - } - return value; - } - - #region * Main and Helpers - static string ToBool(bool condition) { return condition ? "true" : "false"; } - static string ToFloat(float f) { return f.ToString(CultureInfo.InvariantCulture); } - - /// Reads default file and parses it. No need to touch it for your personal settings. - /// Command line args - /// 0 on all OK. - static int Main(string[] args) { - int exitCode = 0; - - try { - #region Parse input parameters - int argNo = 0; - - // Name of raw/JPG to process - string sourceFile = args[argNo++]; - - // What the user selected as his base profile - string defaultProfileFilePath = args[argNo++]; - - // Cache directory, for any logging file - string cachePath = args[argNo++]; - - - // True if the image is only being flagged as inTrash, rank or colorLabel but still need valid PP3 - actually not used by this script - bool forFlaggingPurpose = bool.Parse(args[argNo++], CultureInfo.InvariantCulture); - - // Note that old C++ has no automatic number globalization - double fNumber = double.Parse(args[argNo++], CultureInfo.InvariantCulture); - double exposureSecs = double.Parse(args[argNo++], CultureInfo.InvariantCulture); - double focalLength = double.Parse(args[argNo++], CultureInfo.InvariantCulture); - long iso = long.Parse(args[argNo++], CultureInfo.InvariantCulture); - - string lens = args[argNo++]; - string cameraMake = args[argNo++]; - string cameraModel = args[argNo++]; - string camera = cameraMake + " " + cameraModel; - #endregion - - // Read default file as basis - string[] lines = File.ReadAllLines(defaultProfileFilePath); - - NameValueCollection nvEXIF = ParseFullExifData(sourceFile); - - // File should be Windows ANSI - using (TextWriter tw = new StreamWriter(sourceFile + ".pp3", false, new UTF8Encoding(false))) { - string section = ""; - - foreach (string line in lines) { - string l = line.Trim(); - if (!String.IsNullOrEmpty(line)) { - - if (l.StartsWith("[")) - section = l.Trim(new char[] { '[', ']' }); - else if (char.IsLetterOrDigit(l[0]) && l.Contains("=")) { - int valPos = l.IndexOf("=") + 1; - - string newValue = GetCorrectedSetting(nvEXIF, section + "/" + l.Substring(0, valPos - 1), l.Substring(valPos).Trim(), - fNumber, exposureSecs, focalLength, iso, lens, camera); - - // Merge in new value - l = l.Substring(0, valPos) + (newValue ?? ""); - } - } - - tw.WriteLine(l); - } - } - - } catch (Exception ex) { - Console.WriteLine("Error: " + ex.ToString()); // can be seen in the RT console window - - exitCode = 1; - } - - return exitCode; - } - - - static NameValueCollection ParseFullExifData(string filePath) { - NameValueCollection nv = new NameValueCollection(); - - string exifToolPath = ConfigurationManager.AppSettings["ExifToolPath"]; - if (!String.IsNullOrEmpty(exifToolPath)) { - ProcessStartInfo psi = new ProcessStartInfo(exifToolPath, "\"" + filePath + "\" -tab -short"); - psi.CreateNoWindow = false; - psi.UseShellExecute = false; - psi.StandardOutputEncoding = System.Text.Encoding.UTF8; - psi.RedirectStandardOutput = true; - - Process p = Process.Start(psi); - - using (StreamReader sr = p.StandardOutput) { - while (!sr.EndOfStream) { - string line = sr.ReadLine(); - if (line.Contains("\t")) { - string[] split = line.Split('\t'); - nv.Add(split[0], split[1]); - } - } - } - - p.WaitForExit(); - } - - return nv; - } - - #endregion - } - - #region DistortionCorrectProf - /// Holds a distortion correction profile for one lens. Uses sample points (focal length vs. dist. correction) as input. - class DistortionCorrectProf { - double[] adFocLen, adCorrect; - - /// Parses array to internal structure - /// Focal lengths - /// Correction factors - public DistortionCorrectProf(double[] focLen, double[] correct) { - if (focLen == null || correct == null || focLen.Length != correct.Length || focLen.Length < 2) - throw new Exception("DistortionCorrectProf inputs must be valid and of the same lengths, at least 2 points"); - - adFocLen = focLen; adCorrect = correct; - - for (int i = 0; i < adFocLen.Length - 1; i++) - if (adFocLen[i] >= adFocLen[i + 1]) throw new Exception("The distortion correction focal length points must be ordered!"); - } - - /// Calculates regression value of RT distortion amount for the given focal length. - /// Input focal length. - /// Distortion in RT format. - public string GetDistortionAmount(double focalLength) { - // if it's out of area (which should just happen with e.g. rounding errors), return flat defaults. - if (focalLength <= adFocLen[0]) return adCorrect[0].ToString("G", CultureInfo.InvariantCulture); - if (focalLength >= adFocLen[adFocLen.Length - 1]) return adCorrect[adFocLen.Length - 1].ToString("G", CultureInfo.InvariantCulture); - - for (int i = 0; i < adFocLen.Length - 1; i++) { - if (focalLength >= adFocLen[i] && focalLength < adFocLen[i + 1]) { - // from the sample curves taken so far, it it safe to take a simple linear interpolation here - double corr = adCorrect[i] + (adCorrect[i + 1] - adCorrect[i]) * (focalLength - adFocLen[i]) / (adFocLen[i + 1] - adFocLen[i]); - return corr.ToString("G3", CultureInfo.InvariantCulture); - } - } - - return ""; // should never happen - } - } - #endregion -} +#region Usings +using System; +using System.Text; +using System.IO; +using System.Globalization; +using System.Diagnostics; +using System.Configuration; +using System.Collections; +using System.Collections.Specialized; +#endregion + +// *** Raw Therapee sample Custom Profile builder (version 2013-08-12) *** +// +// +// WARNING: The command line parameters has changed since this file has been created by Oduis. The new mechanism involves a +// temporary communication file (.ini style) to provide system parameters and metadata read by RawTherapee. This script has +// to be updated by some C# developer in order to work. +// +// +// WARNING: PP3 format may change in the future versions! If this happens there will probably be no automatic migration path, +// you'll have to adjust on your own. This is a sample, and therefore not supported by the RT team (just by oduis) +// +// +// How to use: +// 1. Modify the GetCorrectedSettings function below according to your needs. +// 2. Download and install Microsoft .Net Runtime (latest version is 4.0 as of writing), if it's not already on your machine. +// You can get it for free via Windows Update or from microsoft.com. No need for Visual Studio etc. +// 3. Open a command line and compile this CS-File using the C# 32bit compiler. It is usually installed somewhere here: +// C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc.exe +// Call csc.exe (C#-Compiler) with your .CS file as parameter like this (one big line): +// +// C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc +// /r:C:\Windows\Microsoft.NET\Framework\v4.0.30319\System.dll +// /r:C:\Windows\Microsoft.NET\Framework\v4.0.30319\System.Configuration.dll +// RTProfileBuilderSample.cs +// +// (On most machines it already works with "C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc RTProfileBuilderSample.cs") +// CSC will compile it and emit an EXE. +// 4. Open your RT options files and find the entry [Profiles]/CustomProfileBuilder +// 5. Enter the path to your newly built exe here. On Windows, don't forget double slashes (e.g. "C:\\MyDir\\Mybuilder.exe") +// And you're done! The EXE is only called on opening the image editor and there is no PP3 already +// +// If you want to use EXIFTOOL to gather more details information to build queries: +// 1. Download exiftool.exe from http://www.sno.phy.queensu.ca/~phil/exiftool/ +// 2. Rename it to exiftool.exe (NOT exiftool(-k).. or something!) +// 3. Copy the RTProfilerBuilder.exe.config next to your own EXE. If you renamed it, rename config to "(Yourname).exe.config" +// 4. Open the config with notepad (it's an XML file). Set ExifToolPath to your downloaded and renamed exe +// +// If you want to know what parameters are available, call "exiftool.exe -tab -short" +// +// This description is for Windows. The C# code does not use anything fancy, will probably work with MONO on Linux/OSX, too + +namespace RTProfilerBuilder { + /// Main class. Mostly change GetCorrectedSettings. + class RTProfileBuilder { + + /// Adds the Nikkor zoom distortion correction profile. + /// First array is list of focal lengths, second array is the RT setting that should correct the + /// distortion for the corresponding focal length. Values between these values are automatically interpolated. + /// The focal length values must already be ordered. The number of sample points is not limited. + static DistortionCorrectProf distNikkor24120f4 = new DistortionCorrectProf( + new double[] { 24, 28, 35, 50, 70, 85, 120 }, + new double[] { -0.1, -0.063, -0.012, 0.018, 0.034, 0.04, 0.048 } + ); + + + /// This is your personalisation function + /// Full EXIF from EXIFTOOL (if configured). + /// Entry, like "Sharpening/Radius" + /// Current value (from default file) + /// FNumberExposure in seconds + /// Focal length in MMISO value + /// Lens from EXIFCamera from EXIF + /// The value to be written. Simply take the current value if you have nothing to touch. + static string GetCorrectedSetting(NameValueCollection exif, string sectionEntry, string value, + double fNumber, double exposureSecs, double focalLength, long iso, string lens, string camera) { + + string s; + + // We don't do anything to the value if it's not our camera + if (camera.EndsWith("NIKON D700", StringComparison.InvariantCultureIgnoreCase) && lens.Contains("24.0-120.0 mm f/4.0")) { + switch (sectionEntry) { + // Here is the place to adjust your settings + // Pretty simple: "SectionName/EntryName" in options file + + case "Vignetting Correction/Amount": + value = (fNumber < 8 && focalLength < 30) ? "30" : "0"; + break; + + case "RAW/CA": + value = ToBool(fNumber < 11); // Means "Enabled if fnumber<11, otherwise disabled" + break; + + case "Impulse Denoising/Enabled": + value = ToBool(iso >= 3200); + break; + + case "HLRecovery/Enabled": + value = ToBool(iso >= 1600); // Dynamic range decreases, so we'll probably need it + break; + + case "Color Boost/Amount": + if (iso >= 6400) value = "0"; // Colors will get poppy anyway... + break; + + case "Distortion/Amount": + // we already checked in the IF upstairs that this is "our" lens + value = distNikkor24120f4.GetDistortionAmount(focalLength); + break; + + // Add other parameters here. Mention this is case sensitive! + + default: break; // we don't touch values we don't care about + } + } // end if camera=xxx + + + // This is for camera independent settings + switch (sectionEntry) { + // These are parsed from EXIFTOOL and XMP in DNG (see http://en.wikipedia.org/wiki/Extensible_Metadata_Platform) + case "IPTC/City": + s = exif.Get("City"); + if (!String.IsNullOrEmpty(s)) value = s; + break; + + case "IPTC/Country": + s = exif.Get("Country"); + if (!String.IsNullOrEmpty(s)) value = s; + break; + + case "IPTC/Caption": + case "IPTC/Title": + s = exif.Get("Headline"); + if (!String.IsNullOrEmpty(s)) value = s; + break; + + // Add other parameters here. Mention this is case sensitive! + + default: break; // we don't touch values we don't care about + } + return value; + } + +#region * Main and Helpers + static string ToBool(bool condition) { return condition ? "true" : "false"; } + static string ToFloat(float f) { return f.ToString(CultureInfo.InvariantCulture); } + + /// Reads default file and parses it. No need to touch it for your personal settings. + /// Command line args + /// 0 on all OK. + static int Main(string[] args) { + int exitCode = 0; + + try { +#region Parse input parameters + int argNo = 0; + + // Name of raw/JPG to process + string sourceFile = args[argNo++]; + + // What the user selected as his base profile + string defaultProfileFilePath = args[argNo++]; + + // Cache directory, for any logging file + string cachePath = args[argNo++]; + + + // True if the image is only being flagged as inTrash, rank or colorLabel but still need valid PP3 - actually not used by this script + bool forFlaggingPurpose = bool.Parse(args[argNo++], CultureInfo.InvariantCulture); + + // Note that old C++ has no automatic number globalization + double fNumber = double.Parse(args[argNo++], CultureInfo.InvariantCulture); + double exposureSecs = double.Parse(args[argNo++], CultureInfo.InvariantCulture); + double focalLength = double.Parse(args[argNo++], CultureInfo.InvariantCulture); + long iso = long.Parse(args[argNo++], CultureInfo.InvariantCulture); + + string lens = args[argNo++]; + string cameraMake = args[argNo++]; + string cameraModel = args[argNo++]; + string camera = cameraMake + " " + cameraModel; +#endregion + + // Read default file as basis + string[] lines = File.ReadAllLines(defaultProfileFilePath); + + NameValueCollection nvEXIF = ParseFullExifData(sourceFile); + + // File should be Windows ANSI + using (TextWriter tw = new StreamWriter(sourceFile + ".pp3", false, new UTF8Encoding(false))) { + string section = ""; + + foreach (string line in lines) { + string l = line.Trim(); + if (!String.IsNullOrEmpty(line)) { + + if (l.StartsWith("[")) + section = l.Trim(new char[] { '[', ']' }); + else if (char.IsLetterOrDigit(l[0]) && l.Contains("=")) { + int valPos = l.IndexOf("=") + 1; + + string newValue = GetCorrectedSetting(nvEXIF, section + "/" + l.Substring(0, valPos - 1), l.Substring(valPos).Trim(), + fNumber, exposureSecs, focalLength, iso, lens, camera); + + // Merge in new value + l = l.Substring(0, valPos) + (newValue ?? ""); + } + } + + tw.WriteLine(l); + } + } + + } catch (Exception ex) { + Console.WriteLine("Error: " + ex.ToString()); // can be seen in the RT console window + + exitCode = 1; + } + + return exitCode; + } + + + static NameValueCollection ParseFullExifData(string filePath) { + NameValueCollection nv = new NameValueCollection(); + + string exifToolPath = ConfigurationManager.AppSettings["ExifToolPath"]; + if (!String.IsNullOrEmpty(exifToolPath)) { + ProcessStartInfo psi = new ProcessStartInfo(exifToolPath, "\"" + filePath + "\" -tab -short"); + psi.CreateNoWindow = false; + psi.UseShellExecute = false; + psi.StandardOutputEncoding = System.Text.Encoding.UTF8; + psi.RedirectStandardOutput = true; + + Process p = Process.Start(psi); + + using (StreamReader sr = p.StandardOutput) { + while (!sr.EndOfStream) { + string line = sr.ReadLine(); + if (line.Contains("\t")) { + string[] split = line.Split('\t'); + nv.Add(split[0], split[1]); + } + } + } + + p.WaitForExit(); + } + + return nv; + } + +#endregion + } + +#region DistortionCorrectProf + /// Holds a distortion correction profile for one lens. Uses sample points (focal length vs. dist. correction) as input. + class DistortionCorrectProf { + double[] adFocLen, adCorrect; + + /// Parses array to internal structure + /// Focal lengths + /// Correction factors + public DistortionCorrectProf(double[] focLen, double[] correct) { + if (focLen == null || correct == null || focLen.Length != correct.Length || focLen.Length < 2) + throw new Exception("DistortionCorrectProf inputs must be valid and of the same lengths, at least 2 points"); + + adFocLen = focLen; adCorrect = correct; + + for (int i = 0; i < adFocLen.Length - 1; i++) + if (adFocLen[i] >= adFocLen[i + 1]) throw new Exception("The distortion correction focal length points must be ordered!"); + } + + /// Calculates regression value of RT distortion amount for the given focal length. + /// Input focal length. + /// Distortion in RT format. + public string GetDistortionAmount(double focalLength) { + // if it's out of area (which should just happen with e.g. rounding errors), return flat defaults. + if (focalLength <= adFocLen[0]) return adCorrect[0].ToString("G", CultureInfo.InvariantCulture); + if (focalLength >= adFocLen[adFocLen.Length - 1]) return adCorrect[adFocLen.Length - 1].ToString("G", CultureInfo.InvariantCulture); + + for (int i = 0; i < adFocLen.Length - 1; i++) { + if (focalLength >= adFocLen[i] && focalLength < adFocLen[i + 1]) { + // from the sample curves taken so far, it it safe to take a simple linear interpolation here + double corr = adCorrect[i] + (adCorrect[i + 1] - adCorrect[i]) * (focalLength - adFocLen[i]) / (adFocLen[i + 1] - adFocLen[i]); + return corr.ToString("G3", CultureInfo.InvariantCulture); + } + } + + return ""; // should never happen + } + } +#endregion +} diff --git a/tools/RTProfileBuilderSample.exe.config b/tools/RTProfileBuilderSample.exe.config index 2dbb6b973..c054fce6c 100644 --- a/tools/RTProfileBuilderSample.exe.config +++ b/tools/RTProfileBuilderSample.exe.config @@ -1,8 +1,8 @@ - - - - - - - + + + + + + + diff --git a/tools/osx/Info.plist-bin.in b/tools/osx/Info.plist-bin.in index 20ce5a741..33abd4f7a 100644 --- a/tools/osx/Info.plist-bin.in +++ b/tools/osx/Info.plist-bin.in @@ -1,10 +1,10 @@ - - CFBundleName - RawTherapee-bin - CFBundleIdentifier - com.rawtherapee.rawtherapee - + + CFBundleName + RawTherapee-bin + CFBundleIdentifier + com.rawtherapee.rawtherapee + diff --git a/tools/osx/Info.plist.in b/tools/osx/Info.plist.in index e78db0cc4..eec1ab490 100644 --- a/tools/osx/Info.plist.in +++ b/tools/osx/Info.plist.in @@ -1,167 +1,167 @@ - - CFBundleDevelopmentRegion - English - CFBundleDisplayName - RawTherapee - CFBundleDocumentTypes - - - CFBundleTypeExtensions - - pp3 - PP3 - - CFBundleTypeIconFile - profile.icns - CFBundleTypeName - RawTherapee Profile Data - CFBundleTypeRole - Editor - LSIsAppleDefaultForType - - LSItemContentTypes - - com.rawtherapee.pp3 - - - - CFBundleTypeExtensions - - 3FR - 3fr - ARW - arw - CR2 - cr2 - CRF - crf - CRW - crw - DCR - dcr - DNG - dng - FFF - fff - IIQ - iiq - KDC - kdc - MEF - mef - MOS - mos - MRW - mrw - NEF - nef - NRW - nrw - ORF - orf - PEF - pef - RAF - raf - RAW - raw - RW2 - rw2 - RWZ - rwz - SR2 - sr2 - SRF - srf - SRW - srw - - CFBundleTypeMIMETypes - - image/raw - - CFBundleTypeName - Camera Raw - CFBundleTypeRole - Viewer - - - CFBundleTypeExtensions - - JPEG - jpeg - JPG - jpg - PNG - png - TIF - tif - TIFF - tiff - - CFBundleTypeName - Image - CFBundleTypeRole - Viewer - - - CFBundleExecutable - rawtherapee - CFBundleGetInfoString - @version@, Copyright © 2004-2010 Gábor Horváth, 2010-2017 RawTherapee Development Team - CFBundleIconFile - rawtherapee.icns - CFBundleIdentifier - com.rawtherapee.rawtherapee - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - RawTherapee - CFBundlePackageType - APPL - CFBundleShortVersionString - @shortVersion@ - CFBundleSignature - ???? - CFBundleVersion - @shortVersion@ - LSExecutableArchitectures - - @arch@ - - NSHighResolutionCapable - - NSHumanReadableCopyright - Copyright © 2004-2010 Gábor Horváth, 2010-2017 RawTherapee Development Team - UTExportedTypeDeclarations - - - UTTypeConformsTo - - public.data - - UTTypeDescription - RawTherapee Profile Data - UTTypeIconFile - Icons.icns - UTTypeIdentifier - com.rawtherapee.pp3 - UTTypeReferenceURL - http://www.rawtherapee.com/ - UTTypeTagSpecification - - com.apple.ostype - PP3 - public.filename-extension - - pp3 - PP3 - - - - - + + CFBundleDevelopmentRegion + English + CFBundleDisplayName + RawTherapee + CFBundleDocumentTypes + + + CFBundleTypeExtensions + + pp3 + PP3 + + CFBundleTypeIconFile + profile.icns + CFBundleTypeName + RawTherapee Profile Data + CFBundleTypeRole + Editor + LSIsAppleDefaultForType + + LSItemContentTypes + + com.rawtherapee.pp3 + + + + CFBundleTypeExtensions + + 3FR + 3fr + ARW + arw + CR2 + cr2 + CRF + crf + CRW + crw + DCR + dcr + DNG + dng + FFF + fff + IIQ + iiq + KDC + kdc + MEF + mef + MOS + mos + MRW + mrw + NEF + nef + NRW + nrw + ORF + orf + PEF + pef + RAF + raf + RAW + raw + RW2 + rw2 + RWZ + rwz + SR2 + sr2 + SRF + srf + SRW + srw + + CFBundleTypeMIMETypes + + image/raw + + CFBundleTypeName + Camera Raw + CFBundleTypeRole + Viewer + + + CFBundleTypeExtensions + + JPEG + jpeg + JPG + jpg + PNG + png + TIF + tif + TIFF + tiff + + CFBundleTypeName + Image + CFBundleTypeRole + Viewer + + + CFBundleExecutable + rawtherapee + CFBundleGetInfoString + @version@, Copyright © 2004-2010 Gábor Horváth, 2010-2017 RawTherapee Development Team + CFBundleIconFile + rawtherapee.icns + CFBundleIdentifier + com.rawtherapee.rawtherapee + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + RawTherapee + CFBundlePackageType + APPL + CFBundleShortVersionString + @shortVersion@ + CFBundleSignature + ???? + CFBundleVersion + @shortVersion@ + LSExecutableArchitectures + + @arch@ + + NSHighResolutionCapable + + NSHumanReadableCopyright + Copyright © 2004-2010 Gábor Horváth, 2010-2017 RawTherapee Development Team + UTExportedTypeDeclarations + + + UTTypeConformsTo + + public.data + + UTTypeDescription + RawTherapee Profile Data + UTTypeIconFile + Icons.icns + UTTypeIdentifier + com.rawtherapee.pp3 + UTTypeReferenceURL + http://www.rawtherapee.com/ + UTTypeTagSpecification + + com.apple.ostype + PP3 + public.filename-extension + + pp3 + PP3 + + + + + diff --git a/tools/osx/rt.entitlements b/tools/osx/rt.entitlements index 2236af138..082661401 100644 --- a/tools/osx/rt.entitlements +++ b/tools/osx/rt.entitlements @@ -1,20 +1,20 @@ - - application-identifier - com.rawtherapee.rawtherapee - com.apple.security.temporary-exception.files.absolute-path.read-write - - "/" - - com.apple.security.cs.allow-dyld-environment-variables - - com.apple.security.files.user-selected.read-write - - com.apple.security.app-sandbox - - com.apple.security.files.downloads.read-write - - + + application-identifier + com.rawtherapee.rawtherapee + com.apple.security.temporary-exception.files.absolute-path.read-write + + "/" + + com.apple.security.cs.allow-dyld-environment-variables + + com.apple.security.files.user-selected.read-write + + com.apple.security.app-sandbox + + com.apple.security.files.downloads.read-write + +