//-------------------------------------------------
// MatrixF32.h
//-------------------------------------------------
#pragma once
#include <iostream>
#include <iomanip>
#include <random>
#include <string>
#include <cstring>
#include <stdexcept>
#include "AlignedMem.h"
class MatrixF32 {
static const size_t c_Alignment = 16;
size_t m_NumRows;
size_t m_NumCols;
size_t m_NumElements;
size_t m_DataSize;
float* m_Data;
int m_OstreamW;
std::string m_OstreamDelim;
bool IsConforming(size_t num_rows, size_t num_cols) const {
return m_NumRows == num_rows && m_NumCols == num_cols;
}
void Allocate(size_t num_rows, size_t num_cols) {
m_NumRows = num_rows;
m_NumCols = num_cols;
m_NumElements = m_NumRows * m_NumCols;
m_DataSize = m_NumElements * sizeof(float);
if (m_NumElements == 0)
m_Data = nullptr;
else
m_Data = (float*)AlignedMem::Allocate(m_DataSize, c_Alignment);
SetOstream(10, " ");
}
void Cleanup(void) {
m_NumRows = m_NumCols = m_NumElements = m_DataSize = 0;
m_Data = nullptr;
}
void Release(void) {
if (m_Data != nullptr)
AlignedMem::Release(m_Data);
Cleanup();
}
public:
MatrixF32(void) {
Allocate(0, 0);
}
MatrixF32(size_t num_rows, size_t num_cols) {
Allocate(num_rows, num_cols);
Fill(0);
}
MatrixF32(size_t num_rows, size_t num_cols, bool set_identity) {
Allocate(num_rows, num_cols);
Fill(0);
if (set_identity && m_NumRows == m_NumCols) {
for (size_t i = 0; i < num_rows; i++)
m_Data[i * m_NumCols + i] = 1.0f;
}
}
MatrixF32(const MatrixF32& mat) {
Allocate(mat.m_NumRows, mat.m_NumCols);
memcpy(m_Data, mat.m_Data, m_DataSize);
SetOstream(mat.m_OstreamW, mat.m_OstreamDelim);
}
MatrixF32(MatrixF32&& mat) {
m_NumRows = mat.m_NumRows;
m_NumCols = mat.m_NumCols;
m_NumElements = mat.m_NumElements;
m_DataSize = mat.m_DataSize;
m_Data = mat.m_Data;
m_OstreamW = mat.m_OstreamW;
m_OstreamDelim = mat.m_OstreamDelim;
mat.Cleanup();
}
~MatrixF32() {
Release();
}
MatrixF32& operator = (const MatrixF32& mat) {
if (this != &mat) {
if (!IsConforming(*this, mat)) {
Release();
Allocate(mat.m_NumRows, mat.m_NumCols);
}
memcpy(m_Data, mat.m_Data, m_DataSize);
SetOstream(mat.m_OstreamW, mat.m_OstreamDelim);
}
return *this;
}
MatrixF32& operator = (MatrixF32&& mat) {
Release();
m_NumRows = mat.m_NumRows;
m_NumCols = mat.m_NumCols;
m_NumElements = mat.m_NumElements;
m_DataSize = mat.m_DataSize;
m_Data = mat.m_Data;
SetOstream(mat.m_OstreamW, mat.m_OstreamDelim);
mat.Cleanup();
return *this;
}
friend bool operator == (const MatrixF32& mat1, const MatrixF32& mat2) {
if (!IsConforming(mat1, mat2))
return false;
return (memcmp(mat1.m_Data, mat2.m_Data, mat1.m_DataSize) == 0) ? true : false;
}
friend bool operator != (const MatrixF32& mat1, const MatrixF32& mat2) {
if (!IsConforming(mat1, mat2))
return false;
return (memcmp(mat1.m_Data, mat2.m_Data, mat1.m_DataSize) != 0) ? true : false;
}
float* Data(void) { return m_Data; }
const float* Data(void) const { return m_Data; }
size_t GetNumRows(void) const { return m_NumRows; }
size_t GetNumCols(void) const { return m_NumCols; }
size_t GetNumElements(void) const { return m_NumElements; }
bool IsSquare(void) const { return m_NumRows == m_NumCols; }
friend MatrixF32 operator + (const MatrixF32& mat1, const MatrixF32& mat2) {
if (!IsConforming(mat1, mat2))
throw std::runtime_error("Non-conforming operands: operator +");
MatrixF32 result(mat1.m_NumRows, mat1.m_NumCols);
for (size_t i = 0; i < result.m_NumElements; i++)
result.m_Data[i] = mat1.m_Data[i] + mat2.m_Data[i];
return result;
}
friend MatrixF32 operator * (const MatrixF32& mat1, const MatrixF32& mat2) {
if (mat1.m_NumCols != mat2.m_NumRows)
throw std::runtime_error("Non-conforming operands: operator *");
size_t m = mat1.m_NumCols;
MatrixF32 result(mat1.m_NumRows, mat2.m_NumCols);
for (size_t i = 0; i < result.m_NumRows; i++) {
for (size_t j = 0; j < result.m_NumCols; j++) {
float sum = 0;
for (size_t k = 0; k < m; k++) {
float val = mat1.m_Data[i * mat1.m_NumCols + k] * mat2.m_Data[k * mat2.m_NumCols + j];
sum += val;
}
result.m_Data[i * result.m_NumCols + j] = sum;
}
}
return result;
}
//
// For some operations, static functions are used instead of overloaded
// operators in order to avoid inaccurate benchmark performance comparisons.
//
static void Add(MatrixF32& result, const MatrixF32& mat1, const MatrixF32& mat2) {
if (!IsConforming(result, mat1) || !IsConforming(mat1, mat2))
throw std::runtime_error("Non-conforming operands: MatrixF32::Add");
for (size_t i = 0; i < result.m_NumElements; i++)
result.m_Data[i] = mat1.m_Data[i] + mat2.m_Data[i];
}
static void Mul(MatrixF32& result, const MatrixF32& mat1, const MatrixF32& mat2) {
if (mat1.m_NumCols != mat2.m_NumRows)
throw std::runtime_error("Non-conforming operands: MatrixF32::Mul");
if (result.m_NumRows != mat1.m_NumRows || result.m_NumCols != mat2.m_NumCols)
throw std::runtime_error("Invalid matrix size: MatrixF32::Mul");
size_t m = mat1.m_NumCols;
for (size_t i = 0; i < result.m_NumRows; i++) {
for (size_t j = 0; j < result.m_NumCols; j++) {
float sum = 0;
for (size_t k = 0; k < m; k++) {
float val = mat1.m_Data[i * mat1.m_NumCols + k] * mat2.m_Data[k * mat2.m_NumCols + j];
sum += val;
}
result.m_Data[i * result.m_NumCols + j] = sum;
}
}
}
static void Mul4x4(MatrixF32& result, const MatrixF32& mat1, const MatrixF32& mat2) {
const size_t nrows = 4;
const size_t ncols = 4;
const size_t m = 4;
for (size_t i = 0; i < nrows; i++) {
for (size_t j = 0; j < ncols; j++) {
float sum = 0;
for (size_t k = 0; k < m; k++) {
float val = mat1.m_Data[i * mat1.m_NumCols + k] * mat2.m_Data[k * mat2.m_NumCols + j];
sum += val;
}
result.m_Data[i * result.m_NumCols + j] = sum;
}
}
}
static void MulScalar(MatrixF32& result, const MatrixF32& mat, float val) {
if (!IsConforming(result, mat))
throw std::runtime_error("Non-conforming operands: MatrixF32::MulScalar");
for (size_t i = 0; i < result.m_NumElements; i++)
result.m_Data[i] = mat.m_Data[i] * val;
}
static void Transpose(MatrixF32& result, const MatrixF32& mat1) {
if (result.m_NumRows != mat1.m_NumCols || result.m_NumCols != mat1.m_NumRows)
throw std::runtime_error("Non-conforming operands: MatrixF32::Transpose");
for (size_t i = 0; i < result.m_NumRows; i++) {
for (size_t j = 0; j < result.m_NumCols; j++)
result.m_Data[i * result.m_NumCols + j] = mat1.m_Data[j * mat1.m_NumCols + i];
}
}
static bool IsConforming(const MatrixF32& mat1, const MatrixF32& mat2) {
return mat1.m_NumRows == mat2.m_NumRows && mat1.m_NumCols == mat2.m_NumCols;
}
void Fill(float val) {
for (size_t i = 0; i < m_NumElements; i++)
m_Data[i] = val;
}
void FillRandomInt(unsigned int seed_val, int min_val, int max_val, bool exclude_zero) {
std::uniform_int_distribution<> dist {min_val, max_val};
std::mt19937 rng {seed_val};
if (exclude_zero) {
for (size_t i = 0; i < m_NumElements; i++) {
int val;
while ((val = dist(rng)) == 0) {
}
m_Data[i] = (float)val;
}
} else {
for (size_t i = 0; i < m_NumElements; i++)
m_Data[i] = (float)dist(rng);
}
}
void RoundToZero(float epsilon) {
for (size_t i = 0; i < m_NumElements; i++) {
if (fabs(m_Data[i]) < epsilon)
m_Data[i] = 0;
}
}
void RoundToI(float epsilon) {
for (size_t i = 0; i < m_NumRows; i++) {
for (size_t j = 0; j < m_NumCols; j++) {
size_t k = i * m_NumCols + j;
if (i != j) {
if (fabs(m_Data[k]) < epsilon)
m_Data[k] = 0;
} else {
if (fabs(m_Data[k] - 1.0f) < epsilon)
m_Data[k] = 1.0f;
}
}
}
}
void SetCol(size_t col, const float* vals) {
if (col >= m_NumCols)
throw std::runtime_error("Invalid column index: MatrixF32::SetCol()");
for (size_t i = 0; i < m_NumRows; i++)
m_Data[i * m_NumCols + col] = vals[i];
}
void SetRow(size_t row, const float* vals) {
if (row >= m_NumRows)
throw std::runtime_error("Invalid row index: MatrixF32::SetRow()");
for (size_t j = 0; j < m_NumCols; j++)
m_Data[row * m_NumCols + j] = vals[j];
}
void SetI(void) {
if (!IsSquare())
throw std::runtime_error("Square matrix required: MatrixF32::SetI()");
for (size_t i = 0; i < m_NumRows; i++) {
for (size_t j = 0; j < m_NumCols; j++)
m_Data[i * m_NumCols + j] = (i == j) ? 1.0f : 0.0f;
}
}
void SetOstream(int w, const std::string& delim){
m_OstreamW = w;
m_OstreamDelim = delim;
}
float Trace(void) const {
if (!IsSquare())
throw std::runtime_error("Square matrix required: MatrixF32::Trace()");
float sum = 0;
for (size_t i = 0; i < m_NumRows; i++)
sum += m_Data[i * m_NumCols + i];
return sum;
}
float Trace4x4(void) const {
if (m_NumRows != 4 || m_NumCols != 4)
throw std::runtime_error("4x4 matrix required: MatrixF32::Trace()");
float sum = m_Data[0] + m_Data[5] + m_Data[10] + m_Data[15];
return sum;
}
friend std::ostream& operator << (std::ostream& os, const MatrixF32& mat) {
for (size_t i = 0; i < mat.m_NumRows; i++) {
for (size_t j = 0; j < mat.m_NumCols; j++) {
os << std::setw(mat.m_OstreamW) << mat.m_Data[i * mat.m_NumCols + j];
if (j + 1 < mat.m_NumCols)
os << mat.m_OstreamDelim;
}
os << std::endl;
}
return os;
}
};
|