| ch15_03/main.cpp |
#include <iostream>
#include <iomanip>
#include <cmath>
#include "Vec128.h"
using namespace std;
extern void F32fromI32(Vec128 x[2], const Vec128& a);
extern void I32fromF32(Vec128 x[2], const Vec128& a);
extern void F64fromI64(Vec128 x[2], const Vec128& a);
extern void I64fromF64(Vec128 x[2], const Vec128& a);
extern void F32fromU32(Vec128 x[2], const Vec128& a);
extern void U32fromF32(Vec128 x[2], const Vec128& a);
extern void F64fromU64(Vec128 x[2], const Vec128& a);
extern void U64fromF64(Vec128 x[2], const Vec128& a);
extern void F32fromF64(Vec128 x[2], const Vec128& a, const Vec128& b);
extern void F64fromF32(Vec128 x[2], const Vec128& a);
void PackedConvertA(void) {
const char nl = '\n';
Vec128 x[2], a;
// F32_I32
a.m_I32[0] = 10;
a.m_I32[1] = -500;
a.m_I32[2] = 600;
a.m_I32[3] = -1024;
F32fromI32(x, a);
cout << "\nResults for CvtOp::F32_I32\n";
cout << "a: " << a.ToStringI32() << nl;
cout << "x[0]: " << x[0].ToStringF32() << nl;
// I32_F32
a.m_F32[0] = -1.25f;
a.m_F32[1] = 100.875f;
a.m_F32[2] = -200.0f;
a.m_F32[3] = (float)M_PI;
I32fromF32(x, a);
cout << "\nResults for CvtOp::I32_F32\n";
cout << "a: " << a.ToStringF32() << nl;
cout << "x[0]: " << x[0].ToStringI32() << nl;
// F64_I64
a.m_I64[0] = 1000;
a.m_I64[1] = -500000000000;
F64fromI64(x, a);
cout << "\nResults for CvtOp::F64_I64\n";
cout << "a: " << a.ToStringI64() << nl;
cout << "x[0]: " << x[0].ToStringF64() << nl;
// I64_F64
a.m_F64[0] = -122.66666667;
a.m_F64[1] = 1234567890123.75;
I64fromF64(x, a);
cout << "\nResults for CvtOp::I64_F64\n";
cout << "a: " << a.ToStringF64() << nl;
cout << "x[0]: " << x[0].ToStringI64() << nl;
}
void PackedConvertB(void)
{
const char nl = '\n';
Vec128 x[2], a;
// F32_U32
a.m_U32[0] = 10;
a.m_U32[1] = 500;
a.m_U32[2] = 600;
a.m_U32[3] = 1024;
F32fromU32(x, a);
cout << "\nResults for CvtOp::F32_U32\n";
cout << "a: " << a.ToStringU32() << nl;
cout << "x[0]: " << x[0].ToStringF32() << nl;
// U32_F32
a.m_F32[0] = 1.25f;
a.m_F32[1] = 100.875f;
a.m_F32[2] = 200.0f;
a.m_F32[3] = (float)M_PI;
U32fromF32(x, a);
cout << "\nResults for CvtOp::U32_F32\n";
cout << "a: " << a.ToStringF32() << nl;
cout << "x[0]: " << x[0].ToStringU32() << nl;
// F64_U64
a.m_I64[0] = 1000;
a.m_I64[1] = 420000000000;
F64fromU64(x, a);
cout << "\nResults for CvtOp::F64_U64\n";
cout << "a: " << a.ToStringU64() << nl;
cout << "x[0]: " << x[0].ToStringF64() << nl;
// U64_F64
a.m_F64[0] = 698.40;
a.m_F64[1] = 1234567890123.75;
U64fromF64(x, a);
cout << "\nResults for CvtOp::U64_F64\n";
cout << "a: " << a.ToStringF64() << nl;
cout << "x[0]: " << x[0].ToStringU64() << nl;
}
void PackedConvertC(void)
{
const char nl = '\n';
Vec128 x[2], a, b;
// F32_F64
a.m_F64[0] = M_PI;
a.m_F64[1] = M_LOG10E;
b.m_F64[0] = -M_E;
b.m_F64[1] = M_LN2;
F32fromF64(x, a, b);
cout << "\nResults for CvtOp::F32_F64\n";
cout << "a: " << a.ToStringF64() << nl;
cout << "b: " << b.ToStringF64() << nl;
cout << "x[0]: " << x[0].ToStringF32() << nl;
// F64_F32
a.m_F32[0] = 1.0f / 9.0f;
a.m_F32[1] = 100.875f;
a.m_F32[2] = 200.0f;
a.m_F32[3] = (float)M_SQRT2;
F64fromF32(x, a);
cout << "\nResults for CvtOp::F64_F32\n";
cout << "a: " << a.ToStringF32() << nl;
cout << "x[0]: " << x[0].ToStringF64() << nl;
cout << "x[1]: " << x[1].ToStringF64() << nl;
}
int main()
{
PackedConvertA();
PackedConvertB();
PackedConvertC();
return 0;
}
|
| ch15_03/neon.cpp |
#include "Vec128.h"
void F32fromI32(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.4s}, [x1] \n\
scvtf v1.4s, v0.4s // float32 <- int32 \n\
st1 {v1.4s}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void I32fromF32(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.4s}, [x1] \n\
fcvtns v1.4s, v0.4s // int32 <- float32 \n\
st1 {v1.4s}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void F64fromI64(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.2d}, [x1] \n\
scvtf v1.2d, v0.2d // float64 <- int64 \n\
st1 {v1.2d}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void I64fromF64(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.2d}, [x1] \n\
fcvtns v1.2d, v0.2d // int32 <- float32 \n\
st1 {v1.2d}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void F32fromU32(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.4s}, [x1] \n\
ucvtf v1.4s, v0.4s // float32 <- int32 \n\
st1 {v1.4s}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void U32fromF32(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.4s}, [x1] \n\
fcvtnu v1.4s, v0.4s // uint32 <- float32 \n\
st1 {v1.4s}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void F64fromU64(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.2d}, [x1] \n\
ucvtf v1.2d, v0.2d // float64 <- int64 \n\
st1 {v1.2d}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void U64fromF64(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.2d}, [x1] \n\
fcvtnu v1.2d, v0.2d // uint64 <- float64 \n\
st1 {v1.2d}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "x0", "x1"
);
}
void F32fromF64(Vec128 x[2], const Vec128& a, const Vec128& b) {
__asm volatile ("\n\
ld1 {v0.2d}, [x1] \n\
ld1 {v2.2d}, [x2] \n\
fcvtn v1.2s, v0.2d // lower-order F32 \n\
fcvtn2 v1.4s, v2.2d // higher-order F32 \n\
st1 {v1.4s}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "v2", "x0", "x1"
);
}
void F64fromF32(Vec128 x[2], const Vec128& a) {
__asm volatile ("\n\
ld1 {v0.4s}, [x1] \n\
fcvtl v1.2d, v0.2s // lower-order F32 \n\
fcvtl2 v2.2d, v0.4s // higher-order F32 \n\
st1 {v1.2d, v2.2d}, [x0] // [x0] = v1 \n\
"
:
:
: "v0", "v1", "v2", "x0", "x1"
);
}
|
| ch15_03/main.cpp の実行例 |
arm64@manet ch15_03 % g++ -I.. -std=c++11 -O main.cpp neon.cpp -o a.out arm64@manet ch15_03 % ./a.out Results for CvtOp::F32_I32 a: 10 -500 | 600 -1024 x[0]: 10.000000 -500.000000 | 600.000000 -1024.000000 Results for CvtOp::I32_F32 a: -1.250000 100.875000 | -200.000000 3.141593 x[0]: -1 101 | -200 3 Results for CvtOp::F64_I64 a: 1000 | -500000000000 x[0]: 1000.000000000000 | -500000000000.000000000000 Results for CvtOp::I64_F64 a: -122.666666670000 | 1234567890123.750000000000 x[0]: -123 | 1234567890124 Results for CvtOp::F32_U32 a: 10 500 | 600 1024 x[0]: 10.000000 500.000000 | 600.000000 1024.000000 Results for CvtOp::U32_F32 a: 1.250000 100.875000 | 200.000000 3.141593 x[0]: 1 101 | 200 3 Results for CvtOp::F64_U64 a: 1000 | 420000000000 x[0]: 1000.000000000000 | 420000000000.000000000000 Results for CvtOp::U64_F64 a: 698.400000000000 | 1234567890123.750000000000 x[0]: 698 | 1234567890124 Results for CvtOp::F32_F64 a: 3.141592653590 | 0.434294481903 b: -2.718281828459 | 0.693147180560 x[0]: 3.141593 0.434294 | -2.718282 0.693147 Results for CvtOp::F64_F32 a: 0.111111 100.875000 | 200.000000 1.414214 x[0]: 0.111111111939 | 100.875000000000 x[1]: 200.000000000000 | 1.414213538170 |