#include <iostream>
#include <iomanip>
#include <random>
#include <cmath>
#include "mat.h"
#include "AlignedMem.h"
using namespace std;
#define EPS 1.0e-12f
#define ARM_ALIGN 16
extern bool MatVecMulF32_(Vec4x1F32* b, float m[4][4], Vec4x1F32* a, int n);
bool VecCompare(const Vec4x1F32* p, const Vec4x1F32* q)
{
return (fabs(p->W - q->W) < EPS) && (fabs(p->X - q->X) < EPS)
&& (fabs(p->Y - q->Y) < EPS) && (fabs(p->Z - q->Z) < EPS);
}
void InitVecArray(Vec4x1F32* a, size_t n)
{
uniform_int_distribution<> ui_dist {1, 500};
mt19937 rng {187};
for (size_t i = 0; i < n; i++)
{
a[i].W = (float)ui_dist(rng);
a[i].X = (float)ui_dist(rng);
a[i].Y = (float)ui_dist(rng);
a[i].Z = (float)ui_dist(rng);
}
if (n >= 4)
{
// Known values for test purposes
a[0].W = 5; a[0].X = 6; a[0].Y = 7; a[0].Z = 8;
a[1].W = 15; a[1].X = 16; a[1].Y = 17; a[1].Z = 18;
a[2].W = 25; a[2].X = 26; a[2].Y = 27; a[2].Z = 28;
a[3].W = 35; a[3].X = 36; a[3].Y = 37; a[3].Z = 38;
}
}
bool MatVecMulF32Cpp(Vec4x1F32* b, float m[4][4], Vec4x1F32* a, size_t n)
{
if (n == 0 || (n % 4) != 0)
return false;
if (!AlignedMem::IsAligned(a, ARM_ALIGN) || !AlignedMem::IsAligned(b, ARM_ALIGN))
return false;
for (size_t i = 0; i < n; i++)
{
b[i].W = m[0][0] * a[i].W + m[0][1] * a[i].X;
b[i].W += m[0][2] * a[i].Y + m[0][3] * a[i].Z;
b[i].X = m[1][0] * a[i].W + m[1][1] * a[i].X;
b[i].X += m[1][2] * a[i].Y + m[1][3] * a[i].Z;
b[i].Y = m[2][0] * a[i].W + m[2][1] * a[i].X;
b[i].Y += m[2][2] * a[i].Y + m[2][3] * a[i].Z;
b[i].Z = m[3][0] * a[i].W + m[3][1] * a[i].X;
b[i].Z += m[3][2] * a[i].Y + m[3][3] * a[i].Z;
}
return true;
}
void MatVecMulF32(void)
{
const char nl = '\n';
const size_t num_vec = 8;
alignas(ARM_ALIGN) float m[4][4]
{
10.0, 11.0, 12.0, 13.0,
20.0, 21.0, 22.0, 23.0,
30.0, 31.0, 32.0, 33.0,
40.0, 41.0, 42.0, 43.0
};
AlignedArray<Vec4x1F32> a_aa(num_vec, ARM_ALIGN);
AlignedArray<Vec4x1F32> b1_aa(num_vec, ARM_ALIGN);
AlignedArray<Vec4x1F32> b2_aa(num_vec, ARM_ALIGN);
Vec4x1F32* a = a_aa.Data();
Vec4x1F32* b1 = b1_aa.Data();
Vec4x1F32* b2 = b2_aa.Data();
InitVecArray(a, num_vec);
bool rc1 = MatVecMulF32Cpp(b1, m, a, num_vec);
bool rc2 = MatVecMulF32_(b2, m, a, num_vec);
cout << "Results for MatVecMulF32\n";
if (!rc1 || !rc2)
{
cout << "Invalid return code\n";
cout << " rc1 = " << boolalpha << rc1 << nl;
cout << " rc2 = " << boolalpha << rc2 << nl;
return;
}
const unsigned int w = 8;
cout << fixed << setprecision(1);
for (size_t i = 0; i < num_vec; i++)
{
cout << "Test case #" << i << '\n';
cout << "b1: ";
cout << " " << setw(w) << b1[i].W << ' ';
cout << " " << setw(w) << b1[i].X << ' ';
cout << " " << setw(w) << b1[i].Y << ' ';
cout << " " << setw(w) << b1[i].Z << nl;
cout << "b2: ";
cout << " " << setw(w) << b2[i].W << ' ';
cout << " " << setw(w) << b2[i].X << ' ';
cout << " " << setw(w) << b2[i].Y << ' ';
cout << " " << setw(w) << b2[i].Z << nl;
if (!VecCompare(&b1[i], &b2[i]))
{
cout << "Error - vector compare failed\n";
return;
}
}
}
int main()
{
MatVecMulF32();
// MatVecMulF32_BM();
return 0;
}
|