From 0538fce475ec1288f8f90123f1f424dc4141dc3a Mon Sep 17 00:00:00 2001 From: Leonetienne Date: Fri, 11 Feb 2022 14:16:02 +0100 Subject: [PATCH] Made Eule compile Vector2 and Vector4 classes --- Eule/Vector2.cpp | 723 +++++++++++++++++++++++++++++++++++++++- Eule/Vector2.h | 852 +++++------------------------------------------ Eule/Vector4.cpp | 836 +++++++++++++++++++++++++++++++++++++++++++++- Eule/Vector4.h | 814 -------------------------------------------- 4 files changed, 1624 insertions(+), 1601 deletions(-) diff --git a/Eule/Vector2.cpp b/Eule/Vector2.cpp index 746613f..25eddc8 100644 --- a/Eule/Vector2.cpp +++ b/Eule/Vector2.cpp @@ -2,14 +2,719 @@ #include "Vector3.h" #include "Vector4.h" -template -Eule::Vector2::operator Eule::Vector3() const -{ - return Vector3(x, y, 0); -} +#include "Math.h" +#include -template -Eule::Vector2::operator Eule::Vector4() const -{ - return Vector4(x, y, 0, 0); +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ + +namespace Eule { + + template + Vector2::operator Vector3() const + { + return Vector3(x, y, 0); + } + + template + Vector2::operator Vector4() const + { + return Vector4(x, y, 0, 0); + } + + // Good, optimized chad version for doubles + template<> + double Vector2::DotProduct(const Vector2& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components into registers + __m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x); + __m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x); + + // Define bitmask, and execute computation + const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot + __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + + // Retrieve result, and return it + float result[8]; + _mm256_storeu_ps(result, __dot); + + return result[0]; + +#else + return (x * other.x) + + (y * other.y); +#endif + } + + // Slow, lame version for intcels + template<> + double Vector2::DotProduct(const Vector2& other) const + { + int iDot = (x * other.x) + + (y * other.y); + + return (double)iDot; + } + + + + // Good, optimized chad version for doubles + template<> + double Vector2::CrossProduct(const Vector2& other) const + { + return (x * other.y) - + (y * other.x); + } + + // Slow, lame version for intcels + template<> + double Vector2::CrossProduct(const Vector2& other) const + { + int iCross = (x * other.y) - + (y * other.x); + + return (double)iCross; + } + + + + // Good, optimized chad version for doubles + template<> + double Vector2::SqrMagnitude() const + { + // x.DotProduct(x) == x.SqrMagnitude() + return DotProduct(*this); + } + + // Slow, lame version for intcels + template<> + double Vector2::SqrMagnitude() const + { + int iSqrMag = x*x + y*y; + return (double)iSqrMag; + } + + template + double Vector2::Magnitude() const + { + return sqrt(SqrMagnitude()); + } + + + template<> + Vector2 Vector2::VectorScale(const Vector2& scalar) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector2( + result[0], + result[1] + ); + +#else + + return Vector2( + x * scalar.x, + y * scalar.y + ); +#endif + } + + template<> + Vector2 Vector2::VectorScale(const Vector2& scalar) const + { + return Vector2( + x * scalar.x, + y * scalar.y + ); + } + + + template + Vector2 Vector2::Normalize() const + { + Vector2 norm(x, y); + norm.NormalizeSelf(); + + return norm; + } + + // Method to normalize a Vector2d + template<> + void Vector2::NormalizeSelf() + { + double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + } + else + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(0, 0, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + +#else + + x /= length; + y /= length; + +#endif + } + + return; + } + + // You can't normalize an int vector, ffs! + // But we need an implementation for T=int + template<> + void Vector2::NormalizeSelf() + { + x = 0; + y = 0; + + return; + } + + + // Good, optimized chad version for doubles + template<> + void Vector2::LerpSelf(const Vector2& other, double t) + { + const double it = 1.0 - t; // Inverse t + +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + +#else + + x = it * x + t * other.x; + y = it * y + t * other.y; + +#endif + + return; + } + + + +// Slow, lame version for intcels + template<> + void Vector2::LerpSelf(const Vector2& other, double t) + { + const double it = 1.0 - t; // Inverse t + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + + return; + } + + template<> + Vector2 Vector2::Lerp(const Vector2& other, double t) const + { + Vector2d copy(*this); + copy.LerpSelf(other, t); + + return copy; + } + + template<> + Vector2 Vector2::Lerp(const Vector2& other, double t) const + { + Vector2d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; + } + + + + template + T& Vector2::operator[](std::size_t idx) + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + default: + throw std::out_of_range("Array descriptor on Vector2 out of range!"); + } + } + + template + const T& Vector2::operator[](std::size_t idx) const + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + default: + throw std::out_of_range("Array descriptor on Vector2 out of range!"); + } + } + + template + bool Vector2::Similar(const Vector2& other, double epsilon) const + { + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) + ; + } + + template + Vector2 Vector2::ToInt() const + { + return Vector2((int)x, (int)y); + } + + template + Vector2 Vector2::ToDouble() const + { + return Vector2((double)x, (double)y); + } + + template<> + Vector2 Vector2::operator+(const Vector2& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector2( + sum[0], + sum[1] + ); + +#else + + return Vector2( + x + other.x, + y + other.y + ); +#endif + } + + template + Vector2 Vector2::operator+(const Vector2& other) const + { + return Vector2( + x + other.x, + y + other.y + ); + } + + + + template<> + void Vector2::operator+=(const Vector2& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + +#else + + x += other.x; + y += other.y; + +#endif + + return; + } + + template + void Vector2::operator+=(const Vector2& other) + { + x += other.x; + y += other.y; + return; + } + + + + template<> + Vector2 Vector2::operator-(const Vector2& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector2( + diff[0], + diff[1] + ); + +#else + + return Vector2( + x - other.x, + y - other.y + ); +#endif + } + + template + Vector2 Vector2::operator-(const Vector2& other) const + { + return Vector2( + x - other.x, + y - other.y + ); + } + + + + template<> + void Vector2::operator-=(const Vector2& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + +#else + + x -= other.x; + y -= other.y; + +#endif + + return; + } + + template + void Vector2::operator-=(const Vector2& other) + { + x -= other.x; + y -= other.y; + return; + } + + + + template<> + Vector2 Vector2::operator*(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector2( + prod[0], + prod[1] + ); + +#else + + return Vector2( + x * scale, + y * scale + ); + +#endif + } + + template + Vector2 Vector2::operator*(const T scale) const + { + return Vector2( + x * scale, + y * scale + ); + } + + + + template<> + void Vector2::operator*=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + +#else + + x *= scale; + y *= scale; + +#endif + + return; + } + + template + void Vector2::operator*=(const T scale) + { + x *= scale; + y *= scale; + return; + } + + + + template<> + Vector2 Vector2::operator/(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector2( + prod[0], + prod[1] + ); + +#else + + return Vector2( + x / scale, + y / scale + ); + +#endif + } + + template + Vector2 Vector2::operator/(const T scale) const + { + return Vector2( + x / scale, + y / scale + ); + } + + + + template<> + void Vector2::operator/=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + +#else + + x /= scale; + y /= scale; + +#endif + return; + } + + template + void Vector2::operator/=(const T scale) + { + x /= scale; + y /= scale; + return; + } + + + + template + void Vector2::operator=(const Vector2& other) + { + x = other.x; + y = other.y; + + return; + } + + template + void Vector2::operator=(Vector2&& other) noexcept + { + x = std::move(other.x); + y = std::move(other.y); + + return; + } + + template + bool Vector2::operator==(const Vector2& other) const + { + return + (x == other.x) && + (y == other.y); + } + + template + bool Vector2::operator!=(const Vector2& other) const + { + return !operator==(other); + } + + template + Vector2 Vector2::operator-() const + { + return Vector2( + -x, + -y + ); + } + + template class Vector2; + template class Vector2; + + // Some handy predefines + template + const Vector2 Vector2::up(0, 1); + template + const Vector2 Vector2::down(0, -1); + template + const Vector2 Vector2::right(1, 0); + template + const Vector2 Vector2::left(-1, 0); + template + const Vector2 Vector2::one(1, 1); + template + const Vector2 Vector2::zero(0, 0); } diff --git a/Eule/Vector2.h b/Eule/Vector2.h index 825cecb..810d522 100644 --- a/Eule/Vector2.h +++ b/Eule/Vector2.h @@ -1,804 +1,118 @@ #pragma once #include #include -#include "Math.h" -#include -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif +namespace Eule { + template + class Vector3; -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ + template + class Vector4; -namespace Eule -{ - template class Vector3; - template class Vector4; + /** Representation of a 2d vector. + * Contains a lot of utility methods. + */ + template + class Vector2 { + public: + Vector2() : x{0}, y{0} {} - /** Representation of a 2d vector. - * Contains a lot of utility methods. - */ - template - class Vector2 - { - public: - Vector2() : x{ 0 }, y{ 0 } {} - Vector2(T _x, T _y) : x{ _x }, y{ _y } {} - Vector2(const Vector2& other) = default; - Vector2(Vector2&& other) noexcept = default; + Vector2(T _x, T _y) : x{_x}, y{_y} {} - //! Will compute the dot product to another Vector2 - double DotProduct(const Vector2& other) const; + Vector2(const Vector2 &other) = default; - //! Will compute the cross product to another Vector2 - double CrossProduct(const Vector2& other) const; + Vector2(Vector2 &&other) noexcept = default; - //! Will compute the square magnitude - double SqrMagnitude() const; + //! Will compute the dot product to another Vector2 + double DotProduct(const Vector2 &other) const; - //! Will compute the magnitude - double Magnitude() const; + //! Will compute the cross product to another Vector2 + double CrossProduct(const Vector2 &other) const; - //! Will return the normalization of this vector - [[nodiscard]] Vector2 Normalize() const; + //! Will compute the square magnitude + double SqrMagnitude() const; - //! Will normalize this vector - void NormalizeSelf(); + //! Will compute the magnitude + double Magnitude() const; - //! Will scale self.n by scalar.n - Vector2 VectorScale(const Vector2& scalar) const; + //! Will return the normalization of this vector + [[nodiscard]] Vector2 Normalize() const; - //! Will lerp itself towards other by t - void LerpSelf(const Vector2& other, double t); + //! Will normalize this vector + void NormalizeSelf(); - //! Will return a lerp result between this and another vector - [[nodiscard]] Vector2 Lerp(const Vector2& other, double t) const; + //! Will scale self.n by scalar.n + Vector2 VectorScale(const Vector2 &scalar) const; - //! Will compare if two vectors are similar to a certain epsilon value - [[nodiscard]] bool Similar(const Vector2& other, double epsilon = 0.00001) const; + //! Will lerp itself towards other by t + void LerpSelf(const Vector2 &other, double t); - //! Will convert this vector to a Vector2i - [[nodiscard]] Vector2 ToInt() const; + //! Will return a lerp result between this and another vector + [[nodiscard]] Vector2 Lerp(const Vector2 &other, double t) const; - //! Will convert this vector to a Vector2d - [[nodiscard]] Vector2 ToDouble() const; + //! Will compare if two vectors are similar to a certain epsilon value + [[nodiscard]] bool Similar(const Vector2 &other, double epsilon = 0.00001) const; - T& operator[](std::size_t idx); - const T& operator[](std::size_t idx) const; + //! Will convert this vector to a Vector2i + [[nodiscard]] Vector2 ToInt() const; - Vector2 operator+(const Vector2& other) const; - void operator+=(const Vector2& other); - Vector2 operator-(const Vector2& other) const; - void operator-=(const Vector2& other); - Vector2 operator*(const T scale) const; - void operator*=(const T scale); - Vector2 operator/(const T scale) const; - void operator/=(const T scale); - Vector2 operator-() const; + //! Will convert this vector to a Vector2d + [[nodiscard]] Vector2 ToDouble() const; - operator Vector3() const; //! Conversion method - operator Vector4() const; //! Conversion method + T &operator[](std::size_t idx); - void operator=(const Vector2& other); - void operator=(Vector2&& other) noexcept; + const T &operator[](std::size_t idx) const; - bool operator==(const Vector2& other) const; - bool operator!=(const Vector2& other) const; + Vector2 operator+(const Vector2 &other) const; - friend std::ostream& operator<< (std::ostream& os, const Vector2& v) - { - return os << "[x: " << v.x << " y: " << v.y << "]"; - } - friend std::wostream& operator<< (std::wostream& os, const Vector2& v) - { - return os << L"[x: " << v.x << L" y: " << v.y << L"]"; - } + void operator+=(const Vector2 &other); - T x; - T y; + Vector2 operator-(const Vector2 &other) const; - // Some handy predefines - static const Vector2 up; - static const Vector2 down; - static const Vector2 right; - static const Vector2 left; - static const Vector2 one; - static const Vector2 zero; - }; + void operator-=(const Vector2 &other); - typedef Vector2 Vector2i; - typedef Vector2 Vector2d; + Vector2 operator*(const T scale) const; -// Good, optimized chad version for doubles -template<> -double Vector2::DotProduct(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ + void operator*=(const T scale); - // Move vector components into registers - __m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x); - __m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x); + Vector2 operator/(const T scale) const; - // Define bitmask, and execute computation - const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot - __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + void operator/=(const T scale); - // Retrieve result, and return it - float result[8]; - _mm256_storeu_ps(result, __dot); + Vector2 operator-() const; - return result[0]; + operator Vector3() const; //! Conversion method + operator Vector4() const; //! Conversion method + + void operator=(const Vector2 &other); + + void operator=(Vector2 &&other) noexcept; + + bool operator==(const Vector2 &other) const; + + bool operator!=(const Vector2 &other) const; + + friend std::ostream &operator<<(std::ostream &os, const Vector2 &v) { + return os << "[x: " << v.x << " y: " << v.y << "]"; + } + + friend std::wostream &operator<<(std::wostream &os, const Vector2 &v) { + return os << L"[x: " << v.x << L" y: " << v.y << L"]"; + } + + T x; + T y; + + // Some handy predefines + static const Vector2 up; + static const Vector2 down; + static const Vector2 right; + static const Vector2 left; + static const Vector2 one; + static const Vector2 zero; + }; + + typedef Vector2 Vector2i; + typedef Vector2 Vector2d; - #else - return (x * other.x) + - (y * other.y); - #endif -} - -// Slow, lame version for intcels -template<> -double Vector2::DotProduct(const Vector2& other) const -{ - int iDot = (x * other.x) + - (y * other.y); - - return (double)iDot; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector2::CrossProduct(const Vector2& other) const -{ - return (x * other.y) - - (y * other.x); -} - -// Slow, lame version for intcels -template<> -double Vector2::CrossProduct(const Vector2& other) const -{ - int iCross = (x * other.y) - - (y * other.x); - - return (double)iCross; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector2::SqrMagnitude() const -{ - // x.DotProduct(x) == x.SqrMagnitude() - return DotProduct(*this); -} - -// Slow, lame version for intcels -template<> -double Vector2::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y; - return (double)iSqrMag; -} - -template -double Vector2::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - -template<> -Vector2 Vector2::VectorScale(const Vector2& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector2( - result[0], - result[1] - ); - - #else - - return Vector2( - x * scalar.x, - y * scalar.y - ); - #endif -} - -template<> -Vector2 Vector2::VectorScale(const Vector2& scalar) const -{ - return Vector2( - x * scalar.x, - y * scalar.y - ); -} - - -template -Vector2 Vector2::Normalize() const -{ - Vector2 norm(x, y); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector2d -template<> -void Vector2::NormalizeSelf() -{ - double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(0, 0, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x /= length; - y /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector2::NormalizeSelf() -{ - x = 0; - y = 0; - - return; -} - - -// Good, optimized chad version for doubles -template<> -void Vector2::LerpSelf(const Vector2& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - - #else - - x = it * x + t * other.x; - y = it * y + t * other.y; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector2::LerpSelf(const Vector2& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - - return; -} - -template<> -Vector2 Vector2::Lerp(const Vector2& other, double t) const -{ - Vector2d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector2 Vector2::Lerp(const Vector2& other, double t) const -{ - Vector2d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template -T& Vector2::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - default: - throw std::out_of_range("Array descriptor on Vector2 out of range!"); - } -} - -template -const T& Vector2::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - default: - throw std::out_of_range("Array descriptor on Vector2 out of range!"); - } -} - -template -bool Vector2::Similar(const Vector2& other, double epsilon) const -{ - return - (::Eule::Math::Similar(x, other.x, epsilon)) && - (::Eule::Math::Similar(y, other.y, epsilon)) - ; -} - -template -Vector2 Vector2::ToInt() const -{ - return Vector2((int)x, (int)y); -} - -template -Vector2 Vector2::ToDouble() const -{ - return Vector2((double)x, (double)y); -} - -template<> -Vector2 Vector2::operator+(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector2( - sum[0], - sum[1] - ); - - #else - - return Vector2( - x + other.x, - y + other.y - ); - #endif -} - -template -Vector2 Vector2::operator+(const Vector2& other) const -{ - return Vector2( - x + other.x, - y + other.y - ); -} - - - -template<> -void Vector2::operator+=(const Vector2& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - - #else - - x += other.x; - y += other.y; - - #endif - - return; -} - -template -void Vector2::operator+=(const Vector2& other) -{ - x += other.x; - y += other.y; - return; -} - - - -template<> -Vector2 Vector2::operator-(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector2( - diff[0], - diff[1] - ); - - #else - - return Vector2( - x - other.x, - y - other.y - ); - #endif -} - -template -Vector2 Vector2::operator-(const Vector2& other) const -{ - return Vector2( - x - other.x, - y - other.y - ); -} - - - -template<> -void Vector2::operator-=(const Vector2& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - - #else - - x -= other.x; - y -= other.y; - - #endif - - return; -} - -template -void Vector2::operator-=(const Vector2& other) -{ - x -= other.x; - y -= other.y; - return; -} - - - -template<> -Vector2 Vector2::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector2( - prod[0], - prod[1] - ); - - #else - - return Vector2( - x * scale, - y * scale - ); - - #endif -} - -template -Vector2 Vector2::operator*(const T scale) const -{ - return Vector2( - x * scale, - y * scale - ); -} - - - -template<> -void Vector2::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x *= scale; - y *= scale; - - #endif - - return; -} - -template -void Vector2::operator*=(const T scale) -{ - x *= scale; - y *= scale; - return; -} - - - -template<> -Vector2 Vector2::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector2( - prod[0], - prod[1] - ); - - #else - - return Vector2( - x / scale, - y / scale - ); - - #endif -} - -template -Vector2 Vector2::operator/(const T scale) const -{ - return Vector2( - x / scale, - y / scale - ); -} - - - -template<> -void Vector2::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x /= scale; - y /= scale; - - #endif - return; -} - -template -void Vector2::operator/=(const T scale) -{ - x /= scale; - y /= scale; - return; -} - - - -template -void Vector2::operator=(const Vector2& other) -{ - x = other.x; - y = other.y; - - return; -} - -template -void Vector2::operator=(Vector2&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - - return; -} - -template -bool Vector2::operator==(const Vector2& other) const -{ - return - (x == other.x) && - (y == other.y); -} - -template -bool Vector2::operator!=(const Vector2& other) const -{ - return !operator==(other); -} - -template -Vector2 Vector2::operator-() const -{ - return Vector2( - -x, - -y - ); -} - -template class Vector2; -template class Vector2; - -// Some handy predefines -template -const Vector2 Vector2::up(0, 1); -template -const Vector2 Vector2::down(0, -1); -template -const Vector2 Vector2::right(1, 0); -template -const Vector2 Vector2::left(-1, 0); -template -const Vector2 Vector2::one(1, 1); -template -const Vector2 Vector2::zero(0, 0); } diff --git a/Eule/Vector4.cpp b/Eule/Vector4.cpp index 0810f7f..5fd23b6 100644 --- a/Eule/Vector4.cpp +++ b/Eule/Vector4.cpp @@ -2,14 +2,832 @@ #include "Vector2.h" #include "Vector3.h" -template -Eule::Vector4::operator Eule::Vector2() const -{ - return Vector2(x, y); -} +#include "Math.h" +#include -template -Eule::Vector4::operator Eule::Vector3() const -{ - return Vector3(x, y, z); +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ + +namespace Eule { + + template + Vector4::operator Vector2() const + { + return Vector2(x, y); + } + + template + Vector4::operator Vector3() const + { + return Vector3(x, y, z); + } + + + // Good, optimized chad version for doubles + template<> + double Vector4::SqrMagnitude() const + { + return (x * x) + + (y * y) + + (z * z) + + (w * w); + } + + // Slow, lame version for intcels + template<> + double Vector4::SqrMagnitude() const + { + int iSqrMag = x*x + y*y + z*z + w*w; + return (double)iSqrMag; + } + + template + double Vector4::Magnitude() const + { + return sqrt(SqrMagnitude()); + } + + + template<> + Vector4 Vector4::VectorScale(const Vector4& scalar) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector4( + result[0], + result[1], + result[2], + result[3] + ); + +#else + + return Vector4( + x * scalar.x, + y * scalar.y, + z * scalar.z, + w * scalar.w + ); +#endif + } + + + template<> + Vector4 Vector4::VectorScale(const Vector4& scalar) const + { + return Vector4( + x * scalar.x, + y * scalar.y, + z * scalar.z, + w * scalar.w + ); + } + + + + template + Vector4 Vector4::Normalize() const + { + Vector4 norm(x, y, z, w); + norm.NormalizeSelf(); + + return norm; + } + +// Method to normalize a Vector4d + template<> + void Vector4::NormalizeSelf() + { + double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + z = 0; + w = 0; + } + else + { +#ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(w, z, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + +#else + + x /= length; + y /= length; + z /= length; + w /= length; + +#endif + } + + return; + } + + // You can't normalize an int vector, ffs! + // But we need an implementation for T=int + template<> + void Vector4::NormalizeSelf() + { + x = 0; + y = 0; + z = 0; + w = 0; + + return; + } + + + + template + bool Vector4::Similar(const Vector4& other, double epsilon) const + { + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) && + (::Eule::Math::Similar(z, other.z, epsilon)) && + (::Eule::Math::Similar(w, other.w, epsilon)) + ; + } + + template + Vector4 Vector4::ToInt() const + { + return Vector4((int)x, (int)y, (int)z, (int)w); + } + + template + Vector4 Vector4::ToDouble() const + { + return Vector4((double)x, (double)y, (double)z, (double)w); + } + + template + T& Vector4::operator[](std::size_t idx) + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + default: + throw std::out_of_range("Array descriptor on Vector4 out of range!"); + } + } + + template + const T& Vector4::operator[](std::size_t idx) const + { + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + default: + throw std::out_of_range("Array descriptor on Vector4 out of range!"); + } + } + + + + // Good, optimized chad version for doubles + template<> + void Vector4::LerpSelf(const Vector4& other, double t) + { + const double it = 1.0 - t; // Inverse t + +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + w = sum[3]; + +#else + + x = it * x + t * other.x; + y = it * y + t * other.y; + z = it * z + t * other.z; + w = it * w + t * other.w; + +#endif + + return; + } + + + + // Slow, lame version for intcels + template<> + void Vector4::LerpSelf(const Vector4& other, double t) + { + const double it = 1.0 - t; + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + z = (int)(it * (double)z + t * (double)other.z); + w = (int)(it * (double)w + t * (double)other.w); + + return; + } + + template<> + Vector4 Vector4::Lerp(const Vector4& other, double t) const + { + Vector4d copy(*this); + copy.LerpSelf(other, t); + + return copy; + } + + template<> + Vector4 Vector4::Lerp(const Vector4& other, double t) const + { + Vector4d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; + } + + + + template<> + Vector4 Vector4::operator+(const Vector4& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector4( + sum[0], + sum[1], + sum[2], + sum[3] + ); + +#else + + return Vector4( + x + other.x, + y + other.y, + z + other.z, + w + other.w + ); +#endif + } + + template + Vector4 Vector4::operator+(const Vector4& other) const + { + return Vector4( + x + other.x, + y + other.y, + z + other.z, + w + other.w + ); + } + + + + template<> + void Vector4::operator+=(const Vector4& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + w = sum[3]; + +#else + + x += other.x; + y += other.y; + z += other.z; + w += other.w; + +#endif + + return; + } + + template + void Vector4::operator+=(const Vector4& other) + { + x += other.x; + y += other.y; + z += other.z; + w += other.w; + return; + } + + + + template<> + Vector4 Vector4::operator-(const Vector4& other) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector4( + diff[0], + diff[1], + diff[2], + diff[3] + ); + +#else + + return Vector4( + x - other.x, + y - other.y, + z - other.z, + w - other.w + ); +#endif + } + + template + Vector4 Vector4::operator-(const Vector4& other) const + { + return Vector4( + x - other.x, + y - other.y, + z - other.z, + w - other.w + ); + } + + + + template<> + void Vector4::operator-=(const Vector4& other) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + z = diff[2]; + w = diff[3]; + +#else + + x -= other.x; + y -= other.y; + z -= other.z; + w -= other.w; + +#endif + + return; + } + + template + void Vector4::operator-=(const Vector4& other) + { + x -= other.x; + y -= other.y; + z -= other.z; + w -= other.w; + return; + } + + + + template<> + Vector4 Vector4::operator*(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector4( + prod[0], + prod[1], + prod[2], + prod[3] + ); + +#else + + return Vector4( + x * scale, + y * scale, + z * scale, + w * scale + ); + +#endif + } + + template + Vector4 Vector4::operator*(const T scale) const + { + return Vector4( + x * scale, + y * scale, + z * scale, + w * scale + ); + } + + + + template<> + void Vector4::operator*=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + +#else + + x *= scale; + y *= scale; + z *= scale; + w *= scale; + +#endif + + return; + } + + template + void Vector4::operator*=(const T scale) + { + x *= scale; + y *= scale; + z *= scale; + w *= scale; + return; + } + + + + template<> + Vector4 Vector4::operator/(const double scale) const + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector4( + prod[0], + prod[1], + prod[2], + prod[3] + ); + +#else + + return Vector4( + x / scale, + y / scale, + z / scale, + w / scale + ); + +#endif + } + + template + Vector4 Vector4::operator/(const T scale) const + { + return Vector4( + x / scale, + y / scale, + z / scale, + w / scale + ); + } + + + + template<> + void Vector4::operator/=(const double scale) + { +#ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + +#else + + x /= scale; + y /= scale; + z /= scale; + w /= scale; + +#endif + return; + } + + template + void Vector4::operator/=(const T scale) + { + x /= scale; + y /= scale; + z /= scale; + w /= scale; + return; + } + + + + template + bool Vector4::operator==(const Vector4& other) const + { + return + (x == other.x) && + (y == other.y) && + (z == other.z) && + (w == other.w); + } + + + + // Good, optimized chad version for doubles + template<> + Vector4 Vector4::operator*(const Matrix4x4& mat) const + { + Vector4 newVec; + + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); + newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); + + return newVec; + } + + // Slow, lame version for intcels + template<> + Vector4 Vector4::operator*(const Matrix4x4& mat) const + { + Vector4 newVec; + + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); + newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); + + return Vector4( + (int)newVec.x, + (int)newVec.y, + (int)newVec.z, + (int)newVec.w + ); + } + + + + // Good, optimized chad version for doubles + template<> + void Vector4::operator*=(const Matrix4x4& mat) + { + Vector4 buffer = *this; + + // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] + // idk right now. check that if something doesn't work + x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w); + y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w); + z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w); + w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w); + + return; + } + + template + Vector4 Vector4::operator-() const + { + return Vector4( + -x, + -y, + -z, + -w + ); + } + + template + void Vector4::operator=(const Vector4& other) + { + x = other.x; + y = other.y; + z = other.z; + w = other.w; + + return; + } + + template + void Vector4::operator=(Vector4&& other) noexcept + { + x = std::move(other.x); + y = std::move(other.y); + z = std::move(other.z); + w = std::move(other.w); + + return; + } + + // Slow, lame version for intcels + template<> + void Vector4::operator*=(const Matrix4x4& mat) + { + Vector4 buffer(x, y, z, w); + + // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] + // idk right now. check that if something doesn't work + x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w)); + y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w)); + z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w)); + w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w)); + + return; + } + + template + bool Vector4::operator!=(const Vector4& other) const + { + return !operator==(other); + } + + template class Vector4; + template class Vector4; + + // Some handy predefines + template + const Vector4 Vector4::up(0, 1, 0, 0); + template + const Vector4 Vector4::down(0, -1, 0, 0); + template + const Vector4 Vector4::right(1, 0, 0, 0); + template + const Vector4 Vector4::left(-1, 0, 0, 0); + template + const Vector4 Vector4::forward(1, 0, 0, 0); + template + const Vector4 Vector4::backward(-1, 0, 0, 0); + template + const Vector4 Vector4::future(0, 0, 0, 1); + template + const Vector4 Vector4::past(0, 0, 0, -1); + template + const Vector4 Vector4::one(1, 1, 1, 1); + template + const Vector4 Vector4::zero(0, 0, 0, 0); } diff --git a/Eule/Vector4.h b/Eule/Vector4.h index c410b14..0f34b4f 100644 --- a/Eule/Vector4.h +++ b/Eule/Vector4.h @@ -5,22 +5,6 @@ #include #include "Matrix4x4.h" -#include "Math.h" -#include - -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif - -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ namespace Eule { template class Vector2; @@ -121,802 +105,4 @@ namespace Eule typedef Vector4 Vector4i; typedef Vector4 Vector4d; - - // Good, optimized chad version for doubles -template<> -double Vector4::SqrMagnitude() const -{ - return (x * x) + - (y * y) + - (z * z) + - (w * w); -} - -// Slow, lame version for intcels -template<> -double Vector4::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y + z*z + w*w; - return (double)iSqrMag; -} - -template -double Vector4::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - -template<> -Vector4 Vector4::VectorScale(const Vector4& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector4( - result[0], - result[1], - result[2], - result[3] - ); - - #else - - return Vector4( - x * scalar.x, - y * scalar.y, - z * scalar.z, - w * scalar.w - ); - #endif -} - - -template<> -Vector4 Vector4::VectorScale(const Vector4& scalar) const -{ - return Vector4( - x * scalar.x, - y * scalar.y, - z * scalar.z, - w * scalar.w - ); -} - - - -template -Vector4 Vector4::Normalize() const -{ - Vector4 norm(x, y, z, w); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector4d -template<> -void Vector4::NormalizeSelf() -{ - double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - z = 0; - w = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(w, z, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x /= length; - y /= length; - z /= length; - w /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector4::NormalizeSelf() -{ - x = 0; - y = 0; - z = 0; - w = 0; - - return; -} - - - -template -bool Vector4::Similar(const Vector4& other, double epsilon) const -{ - return - (::Eule::Math::Similar(x, other.x, epsilon)) && - (::Eule::Math::Similar(y, other.y, epsilon)) && - (::Eule::Math::Similar(z, other.z, epsilon)) && - (::Eule::Math::Similar(w, other.w, epsilon)) - ; -} - -template -Vector4 Vector4::ToInt() const -{ - return Vector4((int)x, (int)y, (int)z, (int)w); -} - -template -Vector4 Vector4::ToDouble() const -{ - return Vector4((double)x, (double)y, (double)z, (double)w); -} - -template -T& Vector4::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - case 3: - return w; - default: - throw std::out_of_range("Array descriptor on Vector4 out of range!"); - } -} - -template -const T& Vector4::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - case 3: - return w; - default: - throw std::out_of_range("Array descriptor on Vector4 out of range!"); - } -} - - - -// Good, optimized chad version for doubles -template<> -void Vector4::LerpSelf(const Vector4& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - w = sum[3]; - - #else - - x = it * x + t * other.x; - y = it * y + t * other.y; - z = it * z + t * other.z; - w = it * w + t * other.w; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector4::LerpSelf(const Vector4& other, double t) -{ - const double it = 1.0 - t; - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - z = (int)(it * (double)z + t * (double)other.z); - w = (int)(it * (double)w + t * (double)other.w); - - return; -} - -template<> -Vector4 Vector4::Lerp(const Vector4& other, double t) const -{ - Vector4d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector4 Vector4::Lerp(const Vector4& other, double t) const -{ - Vector4d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template<> -Vector4 Vector4::operator+(const Vector4& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector4( - sum[0], - sum[1], - sum[2], - sum[3] - ); - - #else - - return Vector4( - x + other.x, - y + other.y, - z + other.z, - w + other.w - ); - #endif -} - -template -Vector4 Vector4::operator+(const Vector4& other) const -{ - return Vector4( - x + other.x, - y + other.y, - z + other.z, - w + other.w - ); -} - - - -template<> -void Vector4::operator+=(const Vector4& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - w = sum[3]; - - #else - - x += other.x; - y += other.y; - z += other.z; - w += other.w; - - #endif - - return; -} - -template -void Vector4::operator+=(const Vector4& other) -{ - x += other.x; - y += other.y; - z += other.z; - w += other.w; - return; -} - - - -template<> -Vector4 Vector4::operator-(const Vector4& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector4( - diff[0], - diff[1], - diff[2], - diff[3] - ); - - #else - - return Vector4( - x - other.x, - y - other.y, - z - other.z, - w - other.w - ); - #endif -} - -template -Vector4 Vector4::operator-(const Vector4& other) const -{ - return Vector4( - x - other.x, - y - other.y, - z - other.z, - w - other.w - ); -} - - - -template<> -void Vector4::operator-=(const Vector4& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - z = diff[2]; - w = diff[3]; - - #else - - x -= other.x; - y -= other.y; - z -= other.z; - w -= other.w; - - #endif - - return; -} - -template -void Vector4::operator-=(const Vector4& other) -{ - x -= other.x; - y -= other.y; - z -= other.z; - w -= other.w; - return; -} - - - -template<> -Vector4 Vector4::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector4( - prod[0], - prod[1], - prod[2], - prod[3] - ); - - #else - - return Vector4( - x * scale, - y * scale, - z * scale, - w * scale - ); - - #endif -} - -template -Vector4 Vector4::operator*(const T scale) const -{ - return Vector4( - x * scale, - y * scale, - z * scale, - w * scale - ); -} - - - -template<> -void Vector4::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x *= scale; - y *= scale; - z *= scale; - w *= scale; - - #endif - - return; -} - -template -void Vector4::operator*=(const T scale) -{ - x *= scale; - y *= scale; - z *= scale; - w *= scale; - return; -} - - - -template<> -Vector4 Vector4::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector4( - prod[0], - prod[1], - prod[2], - prod[3] - ); - - #else - - return Vector4( - x / scale, - y / scale, - z / scale, - w / scale - ); - - #endif -} - -template -Vector4 Vector4::operator/(const T scale) const -{ - return Vector4( - x / scale, - y / scale, - z / scale, - w / scale - ); -} - - - -template<> -void Vector4::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x /= scale; - y /= scale; - z /= scale; - w /= scale; - - #endif - return; -} - -template -void Vector4::operator/=(const T scale) -{ - x /= scale; - y /= scale; - z /= scale; - w /= scale; - return; -} - - - -template -bool Vector4::operator==(const Vector4& other) const -{ - return - (x == other.x) && - (y == other.y) && - (z == other.z) && - (w == other.w); -} - - - -// Good, optimized chad version for doubles -template<> -Vector4 Vector4::operator*(const Matrix4x4& mat) const -{ - Vector4 newVec; - - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); - newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); - - return newVec; -} - -// Slow, lame version for intcels -template<> -Vector4 Vector4::operator*(const Matrix4x4& mat) const -{ - Vector4 newVec; - - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); - newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); - - return Vector4( - (int)newVec.x, - (int)newVec.y, - (int)newVec.z, - (int)newVec.w - ); -} - - - -// Good, optimized chad version for doubles -template<> -void Vector4::operator*=(const Matrix4x4& mat) -{ - Vector4 buffer = *this; - - // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] - // idk right now. check that if something doesn't work - x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w); - y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w); - z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w); - w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w); - - return; -} - -template -Vector4 Vector4::operator-() const -{ - return Vector4( - -x, - -y, - -z, - -w - ); -} - -template -void Vector4::operator=(const Vector4& other) -{ - x = other.x; - y = other.y; - z = other.z; - w = other.w; - - return; -} - -template -void Vector4::operator=(Vector4&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - z = std::move(other.z); - w = std::move(other.w); - - return; -} - -// Slow, lame version for intcels -template<> -void Vector4::operator*=(const Matrix4x4& mat) -{ - Vector4 buffer(x, y, z, w); - - // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] - // idk right now. check that if something doesn't work - x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w)); - y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w)); - z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w)); - w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w)); - - return; -} - -template -bool Vector4::operator!=(const Vector4& other) const -{ - return !operator==(other); -} - -template class Vector4; -template class Vector4; - -// Some handy predefines -template -const Vector4 Vector4::up(0, 1, 0, 0); -template -const Vector4 Vector4::down(0, -1, 0, 0); -template -const Vector4 Vector4::right(1, 0, 0, 0); -template -const Vector4 Vector4::left(-1, 0, 0, 0); -template -const Vector4 Vector4::forward(1, 0, 0, 0); -template -const Vector4 Vector4::backward(-1, 0, 0, 0); -template -const Vector4 Vector4::future(0, 0, 0, 1); -template -const Vector4 Vector4::past(0, 0, 0, -1); -template -const Vector4 Vector4::one(1, 1, 1, 1); -template -const Vector4 Vector4::zero(0, 0, 0, 0); - }