#include "Vec128.h"
void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b) {
__asm volatile("\n\
ld1 {v0.4s}, [x1] // v0 = a \n\
ld1 {v1.4s}, [x2] // v1 = b \n\
fcmeq v2.4s, v0.4s, v1.4s // packed a == b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
not v2.16b, v2.16b // packed a !=b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmgt v2.4s, v0.4s, v1.4s // packed a > b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmge v2.4s, v0.4s, v1.4s // packed a >= b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmlt v2.4s, v0.4s, v1.4s // packed a < b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmle v2.4s, v0.4s, v1.4s // packed a <= b \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmlt v2.4s, v0.4s, 0.0 // packed a < 0 \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmgt v2.4s, v1.4s, 0.0 // packed b > 0 \n\
st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\
"
:
:
: "v0", "v1", "v2"
);
}
void PackedCompareF64_(Vec128 x[8], const Vec128& a, const Vec128& b) {
__asm volatile("\n\
ld1 {v0.2d}, [x1] // v0 = a \n\
ld1 {v1.2d}, [x2] // v1 = b \n\
fcmeq v2.2d, v0.2d, v1.2d // packed a == b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
not v2.16b, v2.16b // packed a !=b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmgt v2.2d, v0.2d, v1.2d // packed a > b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmge v2.2d, v0.2d, v1.2d // packed a >= b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmlt v2.2d, v0.2d, v1.2d // packed a < b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmle v2.2d, v0.2d, v1.2d // packed a <= b \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmlt v2.2d, v0.2d, 0.0 // packed a < 0 \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
\n\
fcmgt v2.2d, v1.2d, 0.0 // packed b > 0 \n\
st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\
"
:
:
: "v0", "v1", "v2"
);
}
|