Add Ingo's clutstore_no_mmx.patch

Ingo has provided a solution for the strange Windows crash with
`_mm_cvtpu16_ps()`: It was not an alignment problem, but the use of
MMX instructions which led to the SEGV.

Now Ingo's solutions omits MMX instructions altogether and is
nevertheless faster than the `_mm_set_ps()` workaround.

Many thanks to @heckflosse!
This commit is contained in:
Flössie
2016-05-11 20:01:17 +02:00
parent 1edfb0c6f7
commit f4d5c645de

View File

@@ -87,9 +87,18 @@ vfloat2 getClutValues(const AlignedBuffer<std::uint16_t>& clut_image, size_t ind
_mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_srli_si128(v_values, 8))) _mm_cvtepi32_ps(_mm_cvtepu16_epi32(_mm_srli_si128(v_values, 8)))
}; };
#else #else
vint lowval = _mm_shuffle_epi32(v_values, _MM_SHUFFLE(1, 0, 1, 0));
vint highval = _mm_shuffle_epi32(v_values, _MM_SHUFFLE(3, 2, 3, 2));
lowval = _mm_shufflelo_epi16(lowval, _MM_SHUFFLE(1, 1, 0, 0));
highval = _mm_shufflelo_epi16(highval, _MM_SHUFFLE(1, 1, 0, 0));
lowval = _mm_shufflehi_epi16(lowval, _MM_SHUFFLE(3, 3, 2, 2));
highval = _mm_shufflehi_epi16(highval, _MM_SHUFFLE(3, 3, 2, 2));
lowval = vandm(lowval, _mm_set1_epi32(0x0000ffff));
highval = vandm(highval, _mm_set1_epi32(0x0000ffff));
return { return {
_mm_cvtpu16_ps(_mm_movepi64_pi64(v_values)), _mm_cvtepi32_ps(lowval),
_mm_cvtpu16_ps(_mm_movepi64_pi64(_mm_srli_si128(v_values, 8))) _mm_cvtepi32_ps(highval)
}; };
#endif #endif
} }
@@ -261,12 +270,6 @@ void rtengine::HaldCLUT::splitClutFilename(
{ {
Glib::ustring basename = Glib::path_get_basename(filename); Glib::ustring basename = Glib::path_get_basename(filename);
Glib::ustring::size_type last_slash_pos = basename.rfind('/');
if (last_slash_pos == Glib::ustring::npos) {
last_slash_pos = basename.rfind('\\');
}
const Glib::ustring::size_type last_dot_pos = basename.rfind('.'); const Glib::ustring::size_type last_dot_pos = basename.rfind('.');
if (last_dot_pos != Glib::ustring::npos) { if (last_dot_pos != Glib::ustring::npos) {