1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
--- onscripter-jh/simd/int8x16.inl 2021-02-19 19:43:13.985431821 +0800
+++ onscripter-jh.new/simd/int8x16.inl 2021-02-19 19:44:53.623835170 +0800
@@ -105,12 +105,12 @@
inline void store_u_32(void* m, uint8x16 a) {
#ifdef USE_SIMD_X86_SSE2
-#if !defined(__clang_major__) || __clang_major__ >= 8
- _mm_storeu_si32(reinterpret_cast<__m128i*>(m), a);
-#else
- // _mm_storeu_si32 is unavailable in Clang 7;
+//#if !defined(__clang_major__) || __clang_major__ >= 8
+// _mm_storeu_si32(reinterpret_cast<__m128i*>(m), a);
+//#else
+// // _mm_storeu_si32 is unavailable in Clang 7;
_mm_store_ss(reinterpret_cast<float*>(m), _mm_castsi128_ps(a));
-#endif
+//#endif
#elif USE_SIMD_ARM_NEON
vst1q_lane_u32(reinterpret_cast<uint32_t*>(m), a, 1);
#endif
@@ -139,4 +139,4 @@
return vmovl_u8(vget_low_u8(a));
#endif
}
-}
\ No newline at end of file
+}
--- onscripter-jh/simd/int8x16.inl 2021-02-20 18:20:15.854029096 +0800
+++ onscripter-jh.new/simd/int8x16.inl 2021-02-20 18:21:21.350724843 +0800
@@ -112,7 +112,7 @@
_mm_store_ss(reinterpret_cast<float*>(m), _mm_castsi128_ps(a));
//#endif
#elif USE_SIMD_ARM_NEON
- vst1q_lane_u32(reinterpret_cast<uint32_t*>(m), a, 1);
+ vst1q_lane_u32(reinterpret_cast<uint32_t*>(m), vreinterpretq_u32_u8(a), 1);
#endif
}
|