Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ade6cb6

Browse files
AlBuSoft_RTX3070AlBuSoft_RTX3070
authored andcommitted
#100 implemented simd functions for Matrix4x4 operators
1 parent eb44a6b commit ade6cb6

File tree

2 files changed

+64
-7
lines changed

2 files changed

+64
-7
lines changed

Libs/ecm/math/matrix.hpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
/*
2+
* \file matrix4x4.h
3+
*
4+
* \brief This header defines diffrent 4x4 matrices.
5+
*/
6+
17
#pragma once
28
#ifndef _ECM_MATRIX_H_
39
#define _ECM_MATRIX_H_
@@ -8,16 +14,68 @@ namespace ecm::math
814
{
915
// Matrix4x4
1016

17+
/**
18+
* A 4x4 matrix of single-precision floating-point values (float32).
19+
*
20+
* This type alias provides a more convenient name for
21+
* `Matrix4x4_Base<float32>`, making it easier to define and work with 4x4
22+
* matrices of floats.
23+
*
24+
* \since v1.0.0
25+
*/
1126
using Matrix4x4 = Matrix4x4_Base<float32>;
1227

28+
/**
29+
* A 4x4 matrix of single-precision floating-point values (float32) aligned
30+
* to a 16-byte boundary.
31+
*
32+
* Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
33+
* performance in SIMD operations or other vectorized instructions.
34+
*
35+
* \since v1.0.0
36+
*/
1337
using Matrix4x4A = ECM_ALIGN(16) Matrix4x4;
1438

39+
/**
40+
* A 4x4 matrix of 32-bit signed integers.
41+
*
42+
* This type alias provides a more convenient name for
43+
* `Matrix4x4_Base<int32>`, making it easier to define and work with 4x4
44+
* matrices of 32-bit integers.
45+
*
46+
* \since v1.0.0
47+
*/
1548
using Matrix4x4i = Matrix4x4_Base<int32>;
1649

50+
/**
51+
* A 4x4 matrix of 32-bit signed integers aligned to a 16-byte boundary.
52+
*
53+
* Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
54+
* performance in SIMD operations or other vectorized instructions.
55+
*
56+
* \since v1.0.0
57+
*/
1758
using Matrix4x4iA = ECM_ALIGN(16) Matrix4x4i;
1859

60+
/**
61+
* A 4x4 matrix of 32-bit unsigned integers.
62+
*
63+
* This type alias provides a more convenient name for
64+
* `Matrix4x4_Base<uint32>`, making it easier to define and work with 4x4
65+
* matrices of 32-bit unsigned integers.
66+
*
67+
* \since v1.0.0
68+
*/
1969
using Matrix4x4u = Matrix4x4_Base<uint32>;
2070

71+
/**
72+
* A 4x4 matrix of 32-bit unsigned integers aligned to a 16-byte boundary.
73+
*
74+
* Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
75+
* performance in SIMD operations or other vectorized instructions.
76+
*
77+
* \since v1.0.0
78+
*/
2179
using Matrix4x4uA = ECM_ALIGN(16) Matrix4x4u;
2280
} // namespace ecm::math
2381

Libs/ecm/math/matrix4x4.inl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "matrix4x4.hpp"
44
#include "functions.hpp"
5+
#include "functions_simd.hpp"
56

67
#include <limits>
78

@@ -221,8 +222,7 @@ namespace ecm::math
221222
template<typename U, typename>
222223
constexpr Matrix4x4_Base<T>& Matrix4x4_Base<T>::operator*=(Matrix4x4_Base<U> const& m)
223224
{
224-
// TODO: Use this code: return (*this = *this * m);
225-
return *this;
225+
return (*this = *this * m);
226226
}
227227

228228
template<typename T>
@@ -448,11 +448,10 @@ namespace ecm::math
448448
typename Matrix4x4_Base<U>::column_type const sourceB3 = m2[3];
449449

450450
Matrix4x4_Base<T> result;
451-
// TODO: Implement this:
452-
// result[0] = Fma(srca3, SplatW(srcb0), Fma(srca2, SplatZ(srcb0), Fma(srca1, SplatY(srcb0), srca0 * SplatX(srcb0))));
453-
// result[1] = Fma(srca3, SplatW(srcb1), Fma(srca2, SplatZ(srcb1), Fma(srca1, SplatY(srcb1), srca0 * SplatX(srcb1))));
454-
// result[2] = Fma(srca3, SplatW(srcb2), Fma(srca2, SplatZ(srcb2), Fma(srca1, SplatY(srcb2), srca0 * SplatX(srcb2))));
455-
// result[3] = Fma(srca3, SplatW(srcb3), Fma(srca2, SplatZ(srcb3), Fma(srca1, SplatY(srcb3), srca0 * SplatX(srcb3))));
451+
result[0] = Fma(sourceA3, SplatW(sourceB0), Fma(sourceA2, SplatZ(sourceB0), Fma(sourceA1, SplatY(sourceB0), sourceA0 * SplatX(sourceB0))));
452+
result[1] = Fma(sourceA3, SplatW(sourceB1), Fma(sourceA2, SplatZ(sourceB1), Fma(sourceA1, SplatY(sourceB1), sourceA0 * SplatX(sourceB1))));
453+
result[2] = Fma(sourceA3, SplatW(sourceB2), Fma(sourceA2, SplatZ(sourceB2), Fma(sourceA1, SplatY(sourceB2), sourceA0 * SplatX(sourceB2))));
454+
result[3] = Fma(sourceA3, SplatW(sourceB3), Fma(sourceA2, SplatZ(sourceB2), Fma(sourceA1, SplatY(sourceB3), sourceA0 * SplatX(sourceB3))));
456455
return result;
457456
}
458457
else

0 commit comments

Comments
 (0)