diff --git a/Eule/Vector3.cpp b/Eule/Vector3.cpp index fd47e26..692e006 100644 --- a/Eule/Vector3.cpp +++ b/Eule/Vector3.cpp @@ -1,19 +1,927 @@ -#include "Vector3.h" - - - #include "Vector3.h" #include "Vector2.h" #include "Vector4.h" -template -Eule::Vector3::operator Eule::Vector2() const -{ - return Vector2(x, y); -} +#include "Math.h" +#include -template -Eule::Vector3::operator Eule::Vector4() const -{ - return Vector4(x, y, z, 0); +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ + +namespace Eule { + template + Vector3::operator Vector2() const + { + return Vector2(x, y); + } + + template + Vector3::operator Vector4() const + { + return Vector4(x, y, z, 0); + } + + // Good, optimized chad version for doubles + template<> + double Vector3::DotProduct(const Vector3& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components into registers + __m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x); + __m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x); + + // Define bitmask, and execute computation + const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot + __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + + // Retrieve result, and return it + float result[8]; + _mm256_storeu_ps(result, __dot); + + return result[0]; + +#else + return (x * other.x) + + (y * other.y) + + (z * other.z); +#endif + } + +// Slow, lame version for intcels + template<> + double Vector3::DotProduct(const Vector3& other) const + { + int iDot = (x * other.x) + (y * other.y) + (z * other.z); + return (double)iDot; + } + + + + // Good, optimized chad version for doubles + template<> + Vector3 Vector3::CrossProduct(const Vector3& other) const + { + Vector3 cp; + cp.x = (y * other.z) - (z * other.y); + cp.y = (z * other.x) - (x * other.z); + cp.z = (x * other.y) - (y * other.x); + + return cp; + } + + // Slow, lame version for intcels + template<> + Vector3 Vector3::CrossProduct(const Vector3& other) const + { + Vector3 cp; + cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y); + cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z); + cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x); + + return cp; + } + + +// Good, optimized chad version for doubles + template<> + double Vector3::SqrMagnitude() const + { + // x.DotProduct(x) == x.SqrMagnitude() + return DotProduct(*this); + } + +// Slow, lame version for intcels + template<> + double Vector3::SqrMagnitude() const + { + int iSqrMag = x*x + y*y + z*z; + return (double)iSqrMag; + } + + template + double Vector3::Magnitude() const + { + return sqrt(SqrMagnitude()); + } + + + + template<> + Vector3 Vector3::VectorScale(const Vector3& scalar) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector3( + result[0], + result[1], + result[2] + ); + +#else + + return Vector3( + x * scalar.x, + y * scalar.y, + z * scalar.z + ); + +#endif + } + + template<> + Vector3 Vector3::VectorScale(const Vector3& scalar) const + { + return Vector3( + x * scalar.x, + y * scalar.y, + z * scalar.z + ); + } + + + + template + Vector3 Vector3::Normalize() const + { + Vector3 norm(x, y, z); + norm.NormalizeSelf(); + + return norm; + } + +// Method to normalize a Vector3d + template<> + void Vector3::NormalizeSelf() + { + const double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + z = 0; + } + else + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(0, z, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + +#else + + x /= length; + y /= length; + z /= length; + +#endif + } + + return; + } + +// You can't normalize an int vector, ffs! +// But we need an implementation for T=int + template<> + void Vector3::NormalizeSelf() + { + std::cerr << "Stop normalizing int-vectors!!" << std::endl; + x = 0; + y = 0; + z = 0; + + return; + } + + + + template + bool Vector3::Similar(const Vector3& other, double epsilon) const + { + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) && + (::Eule::Math::Similar(z, other.z, epsilon)) + ; + } + + template + Vector3 Vector3::ToInt() const + { + return Vector3((int)x, (int)y, (int)z); + } + + template + Vector3 Vector3::ToDouble() const + { + return Vector3((double)x, (double)y, (double)z); + } + + template + T& Vector3::operator[](std::size_t idx) + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + default: + throw std::out_of_range("Array descriptor on Vector3 out of range!"); + } + } + + template + const T& Vector3::operator[](std::size_t idx) const + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + default: + throw std::out_of_range("Array descriptor on Vector3 out of range!"); + } + } + + + +// Good, optimized chad version for doubles + template<> + void Vector3::LerpSelf(const Vector3& other, double t) + { + const double it = 1.0 - t; // Inverse t + +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + +#else + + x = it*x + t*other.x; + y = it*y + t*other.y; + z = it*z + t*other.z; + +#endif + + return; + } + + + +// Slow, lame version for intcels + template<> + void Vector3::LerpSelf(const Vector3& other, double t) + { + const double it = 1.0 - t; // Inverse t + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + z = (int)(it * (double)z + t * (double)other.z); + + return; + } + + template<> + Vector3 Vector3::Lerp(const Vector3& other, double t) const + { + Vector3d copy(*this); + copy.LerpSelf(other, t); + + return copy; + } + + template<> + Vector3 Vector3::Lerp(const Vector3& other, double t) const + { + Vector3d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; + } + + + + template<> + Vector3 Vector3::operator+(const Vector3& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector3( + sum[0], + sum[1], + sum[2] + ); + +#else + + return Vector3( + x + other.x, + y + other.y, + z + other.z + ); +#endif + } + + template + Vector3 Vector3::operator+(const Vector3& other) const + { + return Vector3( + x + other.x, + y + other.y, + z + other.z + ); + } + + + + template<> + void Vector3::operator+=(const Vector3& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + +#else + + x += other.x; + y += other.y; + z += other.z; + +#endif + + return; + } + + template + void Vector3::operator+=(const Vector3& other) + { + x += other.x; + y += other.y; + z += other.z; + return; + } + + + + template<> + Vector3 Vector3::operator-(const Vector3& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector3( + diff[0], + diff[1], + diff[2] + ); + +#else + + return Vector3( + x - other.x, + y - other.y, + z - other.z + ); +#endif + } + + template + Vector3 Vector3::operator-(const Vector3& other) const + { + return Vector3( + x - other.x, + y - other.y, + z - other.z + ); + } + + + + template<> + void Vector3::operator-=(const Vector3& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + z = diff[2]; + +#else + + x -= other.x; + y -= other.y; + z -= other.z; + +#endif + + return; + } + + template + void Vector3::operator-=(const Vector3& other) + { + x -= other.x; + y -= other.y; + z -= other.z; + return; + } + + + + template<> + Vector3 Vector3::operator*(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector3( + prod[0], + prod[1], + prod[2] + ); + +#else + + return Vector3( + x * scale, + y * scale, + z * scale + ); + +#endif + } + + template + Vector3 Vector3::operator*(const T scale) const + { + return Vector3( + x * scale, + y * scale, + z * scale + ); + } + + + + template<> + void Vector3::operator*=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + +#else + + x *= scale; + y *= scale; + z *= scale; + +#endif + + return; + } + + template + void Vector3::operator*=(const T scale) + { + x *= scale; + y *= scale; + z *= scale; + return; + } + + + + template<> + Vector3 Vector3::operator/(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector3( + prod[0], + prod[1], + prod[2] + ); + +#else + + return Vector3( + x / scale, + y / scale, + z / scale + ); + +#endif + } + + template + Vector3 Vector3::operator/(const T scale) const + { + return Vector3( + x / scale, + y / scale, + z / scale + ); + } + + + + template<> + void Vector3::operator/=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + +#else + + x /= scale; + y /= scale; + z /= scale; + +#endif + return; + } + + template + void Vector3::operator/=(const T scale) + { + x /= scale; + y /= scale; + z /= scale; + return; + } + + +// Good, optimized chad version for doubles + template<> + Vector3 Vector3::operator*(const Matrix4x4& mat) const + { + Vector3 newVec; + +#ifndef _EULE_NO_INTRINSICS_ + // Store x, y, and z values + __m256d __vecx = _mm256_set1_pd(x); + __m256d __vecy = _mm256_set1_pd(y); + __m256d __vecz = _mm256_set1_pd(z); + + // Store matrix values + __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); + __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); + __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); + + // Multiply x, y, z and matrix values + __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); + __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); + __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); + + // Sum up the products + __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); + + // Store translation values + __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); + + // Add the translation values + __m256d __final = _mm256_add_pd(__sum, __translation); + + double dfinal[4]; + + _mm256_storeu_pd(dfinal, __final); + + newVec.x = dfinal[3]; + newVec.y = dfinal[2]; + newVec.z = dfinal[1]; + +#else + // Rotation, Scaling + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); + + // Translation + newVec.x += mat[0][3]; + newVec.y += mat[1][3]; + newVec.z += mat[2][3]; +#endif + + return newVec; + } + +// Slow, lame version for intcels + template<> + Vector3 Vector3::operator*(const Matrix4x4& mat) const + { + Vector3 newVec; + + // Rotation, Scaling + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); + + // Translation + newVec.x += mat[0][3]; + newVec.y += mat[1][3]; + newVec.z += mat[2][3]; + + return Vector3( + (int)newVec.x, + (int)newVec.y, + (int)newVec.z + ); + } + + + +// Good, optimized chad version for doubles + template<> + void Vector3::operator*=(const Matrix4x4& mat) + { +#ifndef _EULE_NO_INTRINSICS_ + // Store x, y, and z values + __m256d __vecx = _mm256_set1_pd(x); + __m256d __vecy = _mm256_set1_pd(y); + __m256d __vecz = _mm256_set1_pd(z); + + // Store matrix values + __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); + __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); + __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); + + // Multiply x, y, z and matrix values + __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); + __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); + __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); + + // Sum up the products + __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); + + // Store translation values + __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); + + // Add the translation values + __m256d __final = _mm256_add_pd(__sum, __translation); + + double dfinal[4]; + + _mm256_storeu_pd(dfinal, __final); + + x = dfinal[3]; + y = dfinal[2]; + z = dfinal[1]; + +#else + Vector3 buffer = *this; + x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z); + y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z); + z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z); + + // Translation + x += mat[0][3]; + y += mat[1][3]; + z += mat[2][3]; +#endif + + return; + } + + template + Vector3 Vector3::operator-() const + { + return Vector3( + -x, + -y, + -z + ); + } + + template + void Vector3::operator=(const Vector3& other) + { + x = other.x; + y = other.y; + z = other.z; + + return; + } + + template + void Vector3::operator=(Vector3&& other) noexcept + { + x = std::move(other.x); + y = std::move(other.y); + z = std::move(other.z); + + return; + } + +// Slow, lame version for intcels + template<> + void Vector3::operator*=(const Matrix4x4& mat) + { + Vector3 buffer(x, y, z); + + x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z)); + y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z)); + z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z)); + + // Translation + x += (int)mat[0][3]; + y += (int)mat[1][3]; + z += (int)mat[2][3]; + + return; + } + + + + template + bool Vector3::operator==(const Vector3& other) const + { + return + (x == other.x) && + (y == other.y) && + (z == other.z); + } + + template + bool Vector3::operator!=(const Vector3& other) const + { + return !operator==(other); + } + + template class Vector3; + template class Vector3; + + // Some handy predefines + template + const Vector3 Vector3::up(0, 1, 0); + template + const Vector3 Vector3::down(0, -1, 0); + template + const Vector3 Vector3::right(1, 0, 0); + template + const Vector3 Vector3::left(-1, 0, 0); + template + const Vector3 Vector3::forward(0, 0, 1); + template + const Vector3 Vector3::backward(0, 0, -1); + template + const Vector3 Vector3::one(1, 1, 1); + template + const Vector3 Vector3::zero(0, 0, 0); } diff --git a/Eule/Vector3.h b/Eule/Vector3.h index d9284b9..c8dd141 100644 --- a/Eule/Vector3.h +++ b/Eule/Vector3.h @@ -5,1019 +5,123 @@ #include #include "Matrix4x4.h" -#include "Math.h" -#include +namespace Eule { + template + class Vector2; -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif + template + class Vector4; -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ + /** Representation of a 3d vector. + * Contains a lot of utility methods. + */ + template + class Vector3 { + public: + Vector3() : x{0}, y{0}, z{0} {} -namespace Eule -{ - template class Vector2; - template class Vector4; + Vector3(T _x, T _y, T _z) : x{_x}, y{_y}, z{_z} {} - /** Representation of a 3d vector. - * Contains a lot of utility methods. - */ - template - class Vector3 - { - public: - Vector3() : x{ 0 }, y{ 0 }, z{ 0 } {} - Vector3(T _x, T _y, T _z) : x{ _x }, y{ _y }, z{ _z } {} - Vector3(const Vector3& other) = default; - Vector3(Vector3&& other) noexcept = default; + Vector3(const Vector3 &other) = default; - //! Will compute the dot product to another Vector3 - double DotProduct(const Vector3& other) const; + Vector3(Vector3 &&other) noexcept = default; - //! Will compute the cross product to another Vector3 - Vector3 CrossProduct(const Vector3& other) const; + //! Will compute the dot product to another Vector3 + double DotProduct(const Vector3 &other) const; - //! Will compute the square magnitude - double SqrMagnitude() const; + //! Will compute the cross product to another Vector3 + Vector3 CrossProduct(const Vector3 &other) const; - //! Will compute the magnitude - double Magnitude() const; + //! Will compute the square magnitude + double SqrMagnitude() const; - //! Will return the normalization of this vector - [[nodiscard]] Vector3 Normalize() const; + //! Will compute the magnitude + double Magnitude() const; - //! Will normalize this vector - void NormalizeSelf(); + //! Will return the normalization of this vector + [[nodiscard]] Vector3 Normalize() const; - //! Will scale self.n by scalar.n - [[nodiscard]] Vector3 VectorScale(const Vector3& scalar) const; + //! Will normalize this vector + void NormalizeSelf(); - //! Will lerp itself towards other by t - void LerpSelf(const Vector3& other, double t); + //! Will scale self.n by scalar.n + [[nodiscard]] Vector3 VectorScale(const Vector3 &scalar) const; - //! Will return a lerp result between this and another vector - [[nodiscard]] Vector3 Lerp(const Vector3& other, double t) const; + //! Will lerp itself towards other by t + void LerpSelf(const Vector3 &other, double t); - //! Will compare if two vectors are similar to a certain epsilon value - [[nodiscard]] bool Similar(const Vector3& other, double epsilon = 0.00001) const; + //! Will return a lerp result between this and another vector + [[nodiscard]] Vector3 Lerp(const Vector3 &other, double t) const; - //! Will convert this vector to a Vector3i - [[nodiscard]] Vector3 ToInt() const; + //! Will compare if two vectors are similar to a certain epsilon value + [[nodiscard]] bool Similar(const Vector3 &other, double epsilon = 0.00001) const; - //! Will convert this vector to a Vector3d - [[nodiscard]] Vector3 ToDouble() const; + //! Will convert this vector to a Vector3i + [[nodiscard]] Vector3 ToInt() const; - T& operator[](std::size_t idx); - const T& operator[](std::size_t idx) const; + //! Will convert this vector to a Vector3d + [[nodiscard]] Vector3 ToDouble() const; - Vector3 operator+(const Vector3& other) const; - void operator+=(const Vector3& other); - Vector3 operator-(const Vector3& other) const; - void operator-=(const Vector3& other); - Vector3 operator*(const T scale) const; - void operator*=(const T scale); - Vector3 operator/(const T scale) const; - void operator/=(const T scale); - Vector3 operator*(const Matrix4x4& mat) const; - void operator*=(const Matrix4x4& mat); - Vector3 operator-() const; + T &operator[](std::size_t idx); - operator Vector2() const; //! Conversion method - operator Vector4() const; //! Conversion method + const T &operator[](std::size_t idx) const; - void operator=(const Vector3& other); - void operator=(Vector3&& other) noexcept; + Vector3 operator+(const Vector3 &other) const; - bool operator==(const Vector3& other) const; - bool operator!=(const Vector3& other) const; + void operator+=(const Vector3 &other); - friend std::ostream& operator << (std::ostream& os, const Vector3& v) - { - return os << "[x: " << v.x << " y: " << v.y << " z: " << v.z << "]"; - } - friend std::wostream& operator << (std::wostream& os, const Vector3& v) - { - return os << L"[x: " << v.x << L" y: " << v.y << L" z: " << v.z << L"]"; - } + Vector3 operator-(const Vector3 &other) const; - T x; - T y; - T z; + void operator-=(const Vector3 &other); - // Some handy predefines - static const Vector3 up; - static const Vector3 down; - static const Vector3 right; - static const Vector3 left; - static const Vector3 forward; - static const Vector3 backward; - static const Vector3 one; - static const Vector3 zero; - }; + Vector3 operator*(const T scale) const; - typedef Vector3 Vector3i; - typedef Vector3 Vector3d; + void operator*=(const T scale); - // Good, optimized chad version for doubles -template<> -double Vector3::DotProduct(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ + Vector3 operator/(const T scale) const; - // Move vector components into registers - __m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x); - __m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x); + void operator/=(const T scale); - // Define bitmask, and execute computation - const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot - __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + Vector3 operator*(const Matrix4x4 &mat) const; - // Retrieve result, and return it - float result[8]; - _mm256_storeu_ps(result, __dot); + void operator*=(const Matrix4x4 &mat); - return result[0]; - - #else - return (x * other.x) + - (y * other.y) + - (z * other.z); - #endif -} - -// Slow, lame version for intcels -template<> -double Vector3::DotProduct(const Vector3& other) const -{ - int iDot = (x * other.x) + (y * other.y) + (z * other.z); - return (double)iDot; -} - - - -// Good, optimized chad version for doubles -template<> -Vector3 Vector3::CrossProduct(const Vector3& other) const -{ - Vector3 cp; - cp.x = (y * other.z) - (z * other.y); - cp.y = (z * other.x) - (x * other.z); - cp.z = (x * other.y) - (y * other.x); - - return cp; -} - -// Slow, lame version for intcels -template<> -Vector3 Vector3::CrossProduct(const Vector3& other) const -{ - Vector3 cp; - cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y); - cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z); - cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x); - - return cp; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector3::SqrMagnitude() const -{ - // x.DotProduct(x) == x.SqrMagnitude() - return DotProduct(*this); -} - -// Slow, lame version for intcels -template<> -double Vector3::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y + z*z; - return (double)iSqrMag; -} - -template -double Vector3::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - - -template<> -Vector3 Vector3::VectorScale(const Vector3& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector3( - result[0], - result[1], - result[2] - ); - - #else - - return Vector3( - x * scalar.x, - y * scalar.y, - z * scalar.z - ); - - #endif -} - -template<> -Vector3 Vector3::VectorScale(const Vector3& scalar) const -{ - return Vector3( - x * scalar.x, - y * scalar.y, - z * scalar.z - ); -} - - - -template -Vector3 Vector3::Normalize() const -{ - Vector3 norm(x, y, z); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector3d -template<> -void Vector3::NormalizeSelf() -{ - const double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - z = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(0, z, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x /= length; - y /= length; - z /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector3::NormalizeSelf() -{ - std::cerr << "Stop normalizing int-vectors!!" << std::endl; - x = 0; - y = 0; - z = 0; - - return; -} - - - -template -bool Vector3::Similar(const Vector3& other, double epsilon) const -{ - return - (::Eule::Math::Similar(x, other.x, epsilon)) && - (::Eule::Math::Similar(y, other.y, epsilon)) && - (::Eule::Math::Similar(z, other.z, epsilon)) - ; -} - -template -Vector3 Vector3::ToInt() const -{ - return Vector3((int)x, (int)y, (int)z); -} - -template -Vector3 Vector3::ToDouble() const -{ - return Vector3((double)x, (double)y, (double)z); -} - -template -T& Vector3::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - default: - throw std::out_of_range("Array descriptor on Vector3 out of range!"); - } -} - -template -const T& Vector3::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - default: - throw std::out_of_range("Array descriptor on Vector3 out of range!"); - } -} - - - -// Good, optimized chad version for doubles -template<> -void Vector3::LerpSelf(const Vector3& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - - #else - - x = it*x + t*other.x; - y = it*y + t*other.y; - z = it*z + t*other.z; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector3::LerpSelf(const Vector3& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - z = (int)(it * (double)z + t * (double)other.z); - - return; -} - -template<> -Vector3 Vector3::Lerp(const Vector3& other, double t) const -{ - Vector3d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector3 Vector3::Lerp(const Vector3& other, double t) const -{ - Vector3d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template<> -Vector3 Vector3::operator+(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector3( - sum[0], - sum[1], - sum[2] - ); - - #else - - return Vector3( - x + other.x, - y + other.y, - z + other.z - ); - #endif -} - -template -Vector3 Vector3::operator+(const Vector3& other) const -{ - return Vector3( - x + other.x, - y + other.y, - z + other.z - ); -} - - - -template<> -void Vector3::operator+=(const Vector3& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - - #else - - x += other.x; - y += other.y; - z += other.z; - - #endif - - return; -} - -template -void Vector3::operator+=(const Vector3& other) -{ - x += other.x; - y += other.y; - z += other.z; - return; -} - - - -template<> -Vector3 Vector3::operator-(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector3( - diff[0], - diff[1], - diff[2] - ); - - #else - - return Vector3( - x - other.x, - y - other.y, - z - other.z - ); - #endif -} - -template -Vector3 Vector3::operator-(const Vector3& other) const -{ - return Vector3( - x - other.x, - y - other.y, - z - other.z - ); -} - - - -template<> -void Vector3::operator-=(const Vector3& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - z = diff[2]; - - #else - - x -= other.x; - y -= other.y; - z -= other.z; - - #endif - - return; -} - -template -void Vector3::operator-=(const Vector3& other) -{ - x -= other.x; - y -= other.y; - z -= other.z; - return; -} - - - -template<> -Vector3 Vector3::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector3( - prod[0], - prod[1], - prod[2] - ); - - #else - - return Vector3( - x * scale, - y * scale, - z * scale - ); - - #endif -} - -template -Vector3 Vector3::operator*(const T scale) const -{ - return Vector3( - x * scale, - y * scale, - z * scale - ); -} - - - -template<> -void Vector3::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x *= scale; - y *= scale; - z *= scale; - - #endif - - return; -} - -template -void Vector3::operator*=(const T scale) -{ - x *= scale; - y *= scale; - z *= scale; - return; -} - - - -template<> -Vector3 Vector3::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector3( - prod[0], - prod[1], - prod[2] - ); - - #else - - return Vector3( - x / scale, - y / scale, - z / scale - ); - - #endif -} - -template -Vector3 Vector3::operator/(const T scale) const -{ - return Vector3( - x / scale, - y / scale, - z / scale - ); -} - - - -template<> -void Vector3::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x /= scale; - y /= scale; - z /= scale; - - #endif - return; -} - -template -void Vector3::operator/=(const T scale) -{ - x /= scale; - y /= scale; - z /= scale; - return; -} - - -// Good, optimized chad version for doubles -template<> -Vector3 Vector3::operator*(const Matrix4x4& mat) const -{ - Vector3 newVec; - - #ifndef _EULE_NO_INTRINSICS_ - // Store x, y, and z values - __m256d __vecx = _mm256_set1_pd(x); - __m256d __vecy = _mm256_set1_pd(y); - __m256d __vecz = _mm256_set1_pd(z); - - // Store matrix values - __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); - __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); - __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); - - // Multiply x, y, z and matrix values - __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); - __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); - __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); - - // Sum up the products - __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); - - // Store translation values - __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); - - // Add the translation values - __m256d __final = _mm256_add_pd(__sum, __translation); - - double dfinal[4]; - - _mm256_storeu_pd(dfinal, __final); - - newVec.x = dfinal[3]; - newVec.y = dfinal[2]; - newVec.z = dfinal[1]; - - #else - // Rotation, Scaling - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); - - // Translation - newVec.x += mat[0][3]; - newVec.y += mat[1][3]; - newVec.z += mat[2][3]; - #endif - - return newVec; -} - -// Slow, lame version for intcels -template<> -Vector3 Vector3::operator*(const Matrix4x4& mat) const -{ - Vector3 newVec; - - // Rotation, Scaling - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); - - // Translation - newVec.x += mat[0][3]; - newVec.y += mat[1][3]; - newVec.z += mat[2][3]; - - return Vector3( - (int)newVec.x, - (int)newVec.y, - (int)newVec.z - ); -} - - - -// Good, optimized chad version for doubles -template<> -void Vector3::operator*=(const Matrix4x4& mat) -{ - #ifndef _EULE_NO_INTRINSICS_ - // Store x, y, and z values - __m256d __vecx = _mm256_set1_pd(x); - __m256d __vecy = _mm256_set1_pd(y); - __m256d __vecz = _mm256_set1_pd(z); - - // Store matrix values - __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); - __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); - __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); - - // Multiply x, y, z and matrix values - __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); - __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); - __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); - - // Sum up the products - __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); - - // Store translation values - __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); - - // Add the translation values - __m256d __final = _mm256_add_pd(__sum, __translation); - - double dfinal[4]; - - _mm256_storeu_pd(dfinal, __final); - - x = dfinal[3]; - y = dfinal[2]; - z = dfinal[1]; - - #else - Vector3 buffer = *this; - x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z); - y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z); - z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z); - - // Translation - x += mat[0][3]; - y += mat[1][3]; - z += mat[2][3]; - #endif - - return; -} - -template -Vector3 Vector3::operator-() const -{ - return Vector3( - -x, - -y, - -z - ); -} - -template -void Vector3::operator=(const Vector3& other) -{ - x = other.x; - y = other.y; - z = other.z; - - return; -} - -template -void Vector3::operator=(Vector3&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - z = std::move(other.z); - - return; -} - -// Slow, lame version for intcels -template<> -void Vector3::operator*=(const Matrix4x4& mat) -{ - Vector3 buffer(x, y, z); - - x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z)); - y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z)); - z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z)); - - // Translation - x += (int)mat[0][3]; - y += (int)mat[1][3]; - z += (int)mat[2][3]; - - return; -} - - - -template -bool Vector3::operator==(const Vector3& other) const -{ - return - (x == other.x) && - (y == other.y) && - (z == other.z); -} - -template -bool Vector3::operator!=(const Vector3& other) const -{ - return !operator==(other); -} - -template class Vector3; -template class Vector3; - -// Some handy predefines -template -const Vector3 Vector3::up(0, 1, 0); -template -const Vector3 Vector3::down(0, -1, 0); -template -const Vector3 Vector3::right(1, 0, 0); -template -const Vector3 Vector3::left(-1, 0, 0); -template -const Vector3 Vector3::forward(0, 0, 1); -template -const Vector3 Vector3::backward(0, 0, -1); -template -const Vector3 Vector3::one(1, 1, 1); -template -const Vector3 Vector3::zero(0, 0, 0); + Vector3 operator-() const; + operator Vector2() const; //! Conversion method + operator Vector4() const; //! Conversion method + + void operator=(const Vector3 &other); + + void operator=(Vector3 &&other) noexcept; + + bool operator==(const Vector3 &other) const; + + bool operator!=(const Vector3 &other) const; + + friend std::ostream &operator<<(std::ostream &os, const Vector3 &v) { + return os << "[x: " << v.x << " y: " << v.y << " z: " << v.z << "]"; + } + + friend std::wostream &operator<<(std::wostream &os, const Vector3 &v) { + return os << L"[x: " << v.x << L" y: " << v.y << L" z: " << v.z << L"]"; + } + + T x; + T y; + T z; + + // Some handy predefines + static const Vector3 up; + static const Vector3 down; + static const Vector3 right; + static const Vector3 left; + static const Vector3 forward; + static const Vector3 backward; + static const Vector3 one; + static const Vector3 zero; + }; + + typedef Vector3 Vector3i; + typedef Vector3 Vector3d; }