#100 implemented simd functions for Matrix4x4 operators

AlBuSoft_RTX3070 · AlBuSoft_RTX3070 · commit ade6cb6dc72d · 2024-12-12T22:56:13.000+01:00
diff --git a/Libs/ecm/math/matrix.hpp b/Libs/ecm/math/matrix.hpp
@@ -1,3 +1,9 @@
+/*
+ * \file matrix4x4.h
+ *
+ * \brief This header defines diffrent 4x4 matrices.
+ */
+
 #pragma once
 #ifndef _ECM_MATRIX_H_
 #define _ECM_MATRIX_H_
@@ -8,16 +14,68 @@ namespace ecm::math
 {
 	// Matrix4x4
 
+	/**
+	 * A 4x4 matrix of single-precision floating-point values (float32).
+	 *
+	 * This type alias provides a more convenient name for
+	 * `Matrix4x4_Base<float32>`, making it easier to define and work with 4x4
+	 * matrices of floats.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4 = Matrix4x4_Base<float32>;
 
+	/**
+	 * A 4x4 matrix of single-precision floating-point values (float32) aligned
+	 * to a 16-byte boundary.
+	 *
+	 * Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
+	 * performance in SIMD operations or other vectorized instructions.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4A = ECM_ALIGN(16) Matrix4x4;
 
+	/**
+	 * A 4x4 matrix of 32-bit signed integers.
+	 *
+	 * This type alias provides a more convenient name for
+	 * `Matrix4x4_Base<int32>`, making it easier to define and work with 4x4
+	 * matrices of 32-bit integers.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4i = Matrix4x4_Base<int32>;
 
+	/**
+	 * A 4x4 matrix of 32-bit signed integers aligned to a 16-byte boundary.
+	 *
+	 * Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
+	 * performance in SIMD operations or other vectorized instructions.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4iA = ECM_ALIGN(16) Matrix4x4i;
 
+	/**
+	 * A 4x4 matrix of 32-bit unsigned integers.
+	 *
+	 * This type alias provides a more convenient name for
+	 * `Matrix4x4_Base<uint32>`, making it easier to define and work with 4x4
+	 * matrices of 32-bit unsigned integers.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4u = Matrix4x4_Base<uint32>;
 
+	/**
+	 * A 4x4 matrix of 32-bit unsigned integers aligned to a 16-byte boundary.
+	 *
+	 * Using `ECM_ALIGN(16)` ensures that the matrix data is aligned for optimal
+	 * performance in SIMD operations or other vectorized instructions.
+	 *
+	 * \since v1.0.0
+	 */
 	using Matrix4x4uA = ECM_ALIGN(16) Matrix4x4u;
 } // namespace ecm::math
 
diff --git a/Libs/ecm/math/matrix4x4.inl b/Libs/ecm/math/matrix4x4.inl
@@ -2,6 +2,7 @@
 
 #include "matrix4x4.hpp"
 #include "functions.hpp"
+#include "functions_simd.hpp"
 
 #include <limits>
 
@@ -221,8 +222,7 @@ namespace ecm::math
 	template<typename U, typename>
 	constexpr Matrix4x4_Base<T>& Matrix4x4_Base<T>::operator*=(Matrix4x4_Base<U> const& m)
 	{
-		// TODO: Use this code: return (*this = *this * m);
-		return *this;
+		return (*this = *this * m);
 	}
 
 	template<typename T>
@@ -448,11 +448,10 @@ namespace ecm::math
 			typename Matrix4x4_Base<U>::column_type const sourceB3 = m2[3];
 
 			Matrix4x4_Base<T> result;
-			// TODO: Implement this:
-			// result[0] = Fma(srca3, SplatW(srcb0), Fma(srca2, SplatZ(srcb0), Fma(srca1, SplatY(srcb0), srca0 * SplatX(srcb0))));
-			// result[1] = Fma(srca3, SplatW(srcb1), Fma(srca2, SplatZ(srcb1), Fma(srca1, SplatY(srcb1), srca0 * SplatX(srcb1))));
-			// result[2] = Fma(srca3, SplatW(srcb2), Fma(srca2, SplatZ(srcb2), Fma(srca1, SplatY(srcb2), srca0 * SplatX(srcb2))));
-			// result[3] = Fma(srca3, SplatW(srcb3), Fma(srca2, SplatZ(srcb3), Fma(srca1, SplatY(srcb3), srca0 * SplatX(srcb3))));
+			result[0] = Fma(sourceA3, SplatW(sourceB0), Fma(sourceA2, SplatZ(sourceB0), Fma(sourceA1, SplatY(sourceB0), sourceA0 * SplatX(sourceB0))));
+			result[1] = Fma(sourceA3, SplatW(sourceB1), Fma(sourceA2, SplatZ(sourceB1), Fma(sourceA1, SplatY(sourceB1), sourceA0 * SplatX(sourceB1))));
+			result[2] = Fma(sourceA3, SplatW(sourceB2), Fma(sourceA2, SplatZ(sourceB2), Fma(sourceA1, SplatY(sourceB2), sourceA0 * SplatX(sourceB2))));
+			result[3] = Fma(sourceA3, SplatW(sourceB3), Fma(sourceA2, SplatZ(sourceB2), Fma(sourceA1, SplatY(sourceB3), sourceA0 * SplatX(sourceB3))));
 			return result;
 		}
 		else