From fa825e91679eb620cb04976155408d92101e069f Mon Sep 17 00:00:00 2001 From: Leonetienne Date: Tue, 8 Feb 2022 10:47:31 +0100 Subject: [PATCH] Fix build warnings on macos --- Eule/Quaternion.cpp | 1 + Eule/Vector2.cpp | 711 +--------------------------------- Eule/Vector2.h | 702 +++++++++++++++++++++++++++++++++ Eule/Vector3.cpp | 917 +------------------------------------------- Eule/Vector3.h | 912 +++++++++++++++++++++++++++++++++++++++++++ Eule/Vector4.cpp | 822 +-------------------------------------- Eule/Vector4.h | 815 +++++++++++++++++++++++++++++++++++++++ Eule/version.h | 2 +- 8 files changed, 2441 insertions(+), 2441 deletions(-) diff --git a/Eule/Quaternion.cpp b/Eule/Quaternion.cpp index eff9938..50f4989 100644 --- a/Eule/Quaternion.cpp +++ b/Eule/Quaternion.cpp @@ -2,6 +2,7 @@ #include "Constants.h" #include #include +#include //#define _EULE_NO_INTRINSICS_ #ifndef _EULE_NO_INTRINSICS_ diff --git a/Eule/Vector2.cpp b/Eule/Vector2.cpp index 1bbd831..746613f 100644 --- a/Eule/Vector2.cpp +++ b/Eule/Vector2.cpp @@ -1,720 +1,15 @@ #include "Vector2.h" -#include "Math.h" -#include - -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif - -using namespace Eule; - -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ - -// Good, optimized chad version for doubles -template<> -double Vector2::DotProduct(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components into registers - __m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x); - __m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x); - - // Define bitmask, and execute computation - const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot - __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); - - // Retrieve result, and return it - float result[8]; - _mm256_storeu_ps(result, __dot); - - return result[0]; - - #else - return (x * other.x) + - (y * other.y); - #endif -} - -// Slow, lame version for intcels -template<> -double Vector2::DotProduct(const Vector2& other) const -{ - int iDot = (x * other.x) + - (y * other.y); - - return (double)iDot; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector2::CrossProduct(const Vector2& other) const -{ - return (x * other.y) - - (y * other.x); -} - -// Slow, lame version for intcels -template<> -double Vector2::CrossProduct(const Vector2& other) const -{ - int iCross = (x * other.y) - - (y * other.x); - - return (double)iCross; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector2::SqrMagnitude() const -{ - // x.DotProduct(x) == x.SqrMagnitude() - return DotProduct(*this); -} - -// Slow, lame version for intcels -template<> -double Vector2::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y; - return (double)iSqrMag; -} - -template -double Vector2::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - -template<> -Vector2 Vector2::VectorScale(const Vector2& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector2( - result[0], - result[1] - ); - - #else - - return Vector2( - x * scalar.x, - y * scalar.y - ); - #endif -} - -template<> -Vector2 Vector2::VectorScale(const Vector2& scalar) const -{ - return Vector2( - x * scalar.x, - y * scalar.y - ); -} - - -template -Vector2 Vector2::Normalize() const -{ - Vector2 norm(x, y); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector2d -template<> -void Vector2::NormalizeSelf() -{ - double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(0, 0, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x /= length; - y /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector2::NormalizeSelf() -{ - std::cerr << "Stop normalizing int-vectors!!" << std::endl; - x = 0; - y = 0; - - return; -} - - -// Good, optimized chad version for doubles -template<> -void Vector2::LerpSelf(const Vector2& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - - #else - - x = it * x + t * other.x; - y = it * y + t * other.y; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector2::LerpSelf(const Vector2& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - - return; -} - -template<> -Vector2 Vector2::Lerp(const Vector2& other, double t) const -{ - Vector2d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector2 Vector2::Lerp(const Vector2& other, double t) const -{ - Vector2d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template -T& Vector2::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - default: - throw std::out_of_range("Array descriptor on Vector2 out of range!"); - } -} - -template -const T& Vector2::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - default: - throw std::out_of_range("Array descriptor on Vector2 out of range!"); - } -} - -template -bool Vector2::Similar(const Vector2& other, double epsilon) const -{ - return - (::Math::Similar(x, other.x, epsilon)) && - (::Math::Similar(y, other.y, epsilon)) - ; -} - -template -Vector2 Vector2::ToInt() const -{ - return Vector2((int)x, (int)y); -} - -template -Vector2 Vector2::ToDouble() const -{ - return Vector2((double)x, (double)y); -} - -template<> -Vector2 Vector2::operator+(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector2( - sum[0], - sum[1] - ); - - #else - - return Vector2( - x + other.x, - y + other.y - ); - #endif -} - -template -Vector2 Vector2::operator+(const Vector2& other) const -{ - return Vector2( - x + other.x, - y + other.y - ); -} - - - -template<> -void Vector2::operator+=(const Vector2& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - - #else - - x += other.x; - y += other.y; - - #endif - - return; -} - -template -void Vector2::operator+=(const Vector2& other) -{ - x += other.x; - y += other.y; - return; -} - - - -template<> -Vector2 Vector2::operator-(const Vector2& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector2( - diff[0], - diff[1] - ); - - #else - - return Vector2( - x - other.x, - y - other.y - ); - #endif -} - -template -Vector2 Vector2::operator-(const Vector2& other) const -{ - return Vector2( - x - other.x, - y - other.y - ); -} - - - -template<> -void Vector2::operator-=(const Vector2& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - - #else - - x -= other.x; - y -= other.y; - - #endif - - return; -} - -template -void Vector2::operator-=(const Vector2& other) -{ - x -= other.x; - y -= other.y; - return; -} - - - -template<> -Vector2 Vector2::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector2( - prod[0], - prod[1] - ); - - #else - - return Vector2( - x * scale, - y * scale - ); - - #endif -} - -template -Vector2 Vector2::operator*(const T scale) const -{ - return Vector2( - x * scale, - y * scale - ); -} - - - -template<> -void Vector2::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x *= scale; - y *= scale; - - #endif - - return; -} - -template -void Vector2::operator*=(const T scale) -{ - x *= scale; - y *= scale; - return; -} - - - -template<> -Vector2 Vector2::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector2( - prod[0], - prod[1] - ); - - #else - - return Vector2( - x / scale, - y / scale - ); - - #endif -} - -template -Vector2 Vector2::operator/(const T scale) const -{ - return Vector2( - x / scale, - y / scale - ); -} - - - -template<> -void Vector2::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, 0, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - - #else - - x /= scale; - y /= scale; - - #endif - return; -} - -template -void Vector2::operator/=(const T scale) -{ - x /= scale; - y /= scale; - return; -} - - - -template -void Vector2::operator=(const Vector2& other) -{ - x = other.x; - y = other.y; - - return; -} - -template -void Vector2::operator=(Vector2&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - - return; -} - -template -bool Vector2::operator==(const Vector2& other) const -{ - return - (x == other.x) && - (y == other.y); -} - -template -bool Vector2::operator!=(const Vector2& other) const -{ - return !operator==(other); -} - -template -Vector2 Vector2::operator-() const -{ - return Vector2( - -x, - -y - ); -} - -// Don't want these includes above the other stuff #include "Vector3.h" #include "Vector4.h" + template -Vector2::operator Vector3() const +Eule::Vector2::operator Eule::Vector3() const { return Vector3(x, y, 0); } template -Vector2::operator Vector4() const +Eule::Vector2::operator Eule::Vector4() const { return Vector4(x, y, 0, 0); } - -template class Vector2; -template class Vector2; - -// Some handy predefines -template -const Vector2 Vector2::up(0, 1); -template -const Vector2 Vector2::down(0, -1); -template -const Vector2 Vector2::right(1, 0); -template -const Vector2 Vector2::left(-1, 0); -template -const Vector2 Vector2::one(1, 1); -template -const Vector2 Vector2::zero(0, 0); diff --git a/Eule/Vector2.h b/Eule/Vector2.h index de2c6fa..f46b46a 100644 --- a/Eule/Vector2.h +++ b/Eule/Vector2.h @@ -1,6 +1,22 @@ #pragma once #include #include +#include "Math.h" +#include + +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ namespace Eule { @@ -100,4 +116,690 @@ namespace Eule typedef Vector2 Vector2i; typedef Vector2 Vector2d; + +// Good, optimized chad version for doubles +template<> +double Vector2::DotProduct(const Vector2& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components into registers + __m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x); + __m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x); + + // Define bitmask, and execute computation + const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot + __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + + // Retrieve result, and return it + float result[8]; + _mm256_storeu_ps(result, __dot); + + return result[0]; + + #else + return (x * other.x) + + (y * other.y); + #endif +} + +// Slow, lame version for intcels +template<> +double Vector2::DotProduct(const Vector2& other) const +{ + int iDot = (x * other.x) + + (y * other.y); + + return (double)iDot; +} + + + +// Good, optimized chad version for doubles +template<> +double Vector2::CrossProduct(const Vector2& other) const +{ + return (x * other.y) - + (y * other.x); +} + +// Slow, lame version for intcels +template<> +double Vector2::CrossProduct(const Vector2& other) const +{ + int iCross = (x * other.y) - + (y * other.x); + + return (double)iCross; +} + + + +// Good, optimized chad version for doubles +template<> +double Vector2::SqrMagnitude() const +{ + // x.DotProduct(x) == x.SqrMagnitude() + return DotProduct(*this); +} + +// Slow, lame version for intcels +template<> +double Vector2::SqrMagnitude() const +{ + int iSqrMag = x*x + y*y; + return (double)iSqrMag; +} + +template +double Vector2::Magnitude() const +{ + return sqrt(SqrMagnitude()); +} + + +template<> +Vector2 Vector2::VectorScale(const Vector2& scalar) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector2( + result[0], + result[1] + ); + + #else + + return Vector2( + x * scalar.x, + y * scalar.y + ); + #endif +} + +template<> +Vector2 Vector2::VectorScale(const Vector2& scalar) const +{ + return Vector2( + x * scalar.x, + y * scalar.y + ); +} + + +template +Vector2 Vector2::Normalize() const +{ + Vector2 norm(x, y); + norm.NormalizeSelf(); + + return norm; +} + +// Method to normalize a Vector2d +template<> +void Vector2::NormalizeSelf() +{ + double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + } + else + { + #ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(0, 0, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + + #else + + x /= length; + y /= length; + + #endif + } + + return; +} + +// You can't normalize an int vector, ffs! +// But we need an implementation for T=int +template<> +void Vector2::NormalizeSelf() +{ + std::cerr << "Stop normalizing int-vectors!!" << std::endl; + x = 0; + y = 0; + + return; +} + + +// Good, optimized chad version for doubles +template<> +void Vector2::LerpSelf(const Vector2& other, double t) +{ + const double it = 1.0 - t; // Inverse t + + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + + #else + + x = it * x + t * other.x; + y = it * y + t * other.y; + + #endif + + return; +} + + + +// Slow, lame version for intcels +template<> +void Vector2::LerpSelf(const Vector2& other, double t) +{ + const double it = 1.0 - t; // Inverse t + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + + return; +} + +template<> +Vector2 Vector2::Lerp(const Vector2& other, double t) const +{ + Vector2d copy(*this); + copy.LerpSelf(other, t); + + return copy; +} + +template<> +Vector2 Vector2::Lerp(const Vector2& other, double t) const +{ + Vector2d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; +} + + + +template +T& Vector2::operator[](std::size_t idx) +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + default: + throw std::out_of_range("Array descriptor on Vector2 out of range!"); + } +} + +template +const T& Vector2::operator[](std::size_t idx) const +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + default: + throw std::out_of_range("Array descriptor on Vector2 out of range!"); + } +} + +template +bool Vector2::Similar(const Vector2& other, double epsilon) const +{ + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) + ; +} + +template +Vector2 Vector2::ToInt() const +{ + return Vector2((int)x, (int)y); +} + +template +Vector2 Vector2::ToDouble() const +{ + return Vector2((double)x, (double)y); +} + +template<> +Vector2 Vector2::operator+(const Vector2& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector2( + sum[0], + sum[1] + ); + + #else + + return Vector2( + x + other.x, + y + other.y + ); + #endif +} + +template +Vector2 Vector2::operator+(const Vector2& other) const +{ + return Vector2( + x + other.x, + y + other.y + ); +} + + + +template<> +void Vector2::operator+=(const Vector2& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + + #else + + x += other.x; + y += other.y; + + #endif + + return; +} + +template +void Vector2::operator+=(const Vector2& other) +{ + x += other.x; + y += other.y; + return; +} + + + +template<> +Vector2 Vector2::operator-(const Vector2& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector2( + diff[0], + diff[1] + ); + + #else + + return Vector2( + x - other.x, + y - other.y + ); + #endif +} + +template +Vector2 Vector2::operator-(const Vector2& other) const +{ + return Vector2( + x - other.x, + y - other.y + ); +} + + + +template<> +void Vector2::operator-=(const Vector2& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + + #else + + x -= other.x; + y -= other.y; + + #endif + + return; +} + +template +void Vector2::operator-=(const Vector2& other) +{ + x -= other.x; + y -= other.y; + return; +} + + + +template<> +Vector2 Vector2::operator*(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector2( + prod[0], + prod[1] + ); + + #else + + return Vector2( + x * scale, + y * scale + ); + + #endif +} + +template +Vector2 Vector2::operator*(const T scale) const +{ + return Vector2( + x * scale, + y * scale + ); +} + + + +template<> +void Vector2::operator*=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + + #else + + x *= scale; + y *= scale; + + #endif + + return; +} + +template +void Vector2::operator*=(const T scale) +{ + x *= scale; + y *= scale; + return; +} + + + +template<> +Vector2 Vector2::operator/(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector2( + prod[0], + prod[1] + ); + + #else + + return Vector2( + x / scale, + y / scale + ); + + #endif +} + +template +Vector2 Vector2::operator/(const T scale) const +{ + return Vector2( + x / scale, + y / scale + ); +} + + + +template<> +void Vector2::operator/=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, 0, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + + #else + + x /= scale; + y /= scale; + + #endif + return; +} + +template +void Vector2::operator/=(const T scale) +{ + x /= scale; + y /= scale; + return; +} + + + +template +void Vector2::operator=(const Vector2& other) +{ + x = other.x; + y = other.y; + + return; +} + +template +void Vector2::operator=(Vector2&& other) noexcept +{ + x = std::move(other.x); + y = std::move(other.y); + + return; +} + +template +bool Vector2::operator==(const Vector2& other) const +{ + return + (x == other.x) && + (y == other.y); +} + +template +bool Vector2::operator!=(const Vector2& other) const +{ + return !operator==(other); +} + +template +Vector2 Vector2::operator-() const +{ + return Vector2( + -x, + -y + ); +} + +template class Vector2; +template class Vector2; + +// Some handy predefines +template +const Vector2 Vector2::up(0, 1); +template +const Vector2 Vector2::down(0, -1); +template +const Vector2 Vector2::right(1, 0); +template +const Vector2 Vector2::left(-1, 0); +template +const Vector2 Vector2::one(1, 1); +template +const Vector2 Vector2::zero(0, 0); } diff --git a/Eule/Vector3.cpp b/Eule/Vector3.cpp index c8d71ef..fd47e26 100644 --- a/Eule/Vector3.cpp +++ b/Eule/Vector3.cpp @@ -1,928 +1,19 @@ #include "Vector3.h" -#include "Math.h" -#include -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif - -using namespace Eule; - -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ - -// Good, optimized chad version for doubles -template<> -double Vector3::DotProduct(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components into registers - __m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x); - __m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x); - - // Define bitmask, and execute computation - const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot - __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); - - // Retrieve result, and return it - float result[8]; - _mm256_storeu_ps(result, __dot); - - return result[0]; - - #else - return (x * other.x) + - (y * other.y) + - (z * other.z); - #endif -} - -// Slow, lame version for intcels -template<> -double Vector3::DotProduct(const Vector3& other) const -{ - int iDot = (x * other.x) + (y * other.y) + (z * other.z); - return (double)iDot; -} - - - -// Good, optimized chad version for doubles -template<> -Vector3 Vector3::CrossProduct(const Vector3& other) const -{ - Vector3 cp; - cp.x = (y * other.z) - (z * other.y); - cp.y = (z * other.x) - (x * other.z); - cp.z = (x * other.y) - (y * other.x); - - return cp; -} - -// Slow, lame version for intcels -template<> -Vector3 Vector3::CrossProduct(const Vector3& other) const -{ - Vector3 cp; - cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y); - cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z); - cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x); - - return cp; -} - - - -// Good, optimized chad version for doubles -template<> -double Vector3::SqrMagnitude() const -{ - // x.DotProduct(x) == x.SqrMagnitude() - return DotProduct(*this); -} - -// Slow, lame version for intcels -template<> -double Vector3::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y + z*z; - return (double)iSqrMag; -} - -template -double Vector3::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - - -template<> -Vector3 Vector3::VectorScale(const Vector3& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector3( - result[0], - result[1], - result[2] - ); - - #else - - return Vector3( - x * scalar.x, - y * scalar.y, - z * scalar.z - ); - - #endif -} - -template<> -Vector3 Vector3::VectorScale(const Vector3& scalar) const -{ - return Vector3( - x * scalar.x, - y * scalar.y, - z * scalar.z - ); -} - - - -template -Vector3 Vector3::Normalize() const -{ - Vector3 norm(x, y, z); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector3d -template<> -void Vector3::NormalizeSelf() -{ - const double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - z = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(0, z, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x /= length; - y /= length; - z /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector3::NormalizeSelf() -{ - std::cerr << "Stop normalizing int-vectors!!" << std::endl; - x = 0; - y = 0; - z = 0; - - return; -} - - - -template -bool Vector3::Similar(const Vector3& other, double epsilon) const -{ - return - (::Math::Similar(x, other.x, epsilon)) && - (::Math::Similar(y, other.y, epsilon)) && - (::Math::Similar(z, other.z, epsilon)) - ; -} - -template -Vector3 Vector3::ToInt() const -{ - return Vector3((int)x, (int)y, (int)z); -} - -template -Vector3 Vector3::ToDouble() const -{ - return Vector3((double)x, (double)y, (double)z); -} - -template -T& Vector3::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - default: - throw std::out_of_range("Array descriptor on Vector3 out of range!"); - } -} - -template -const T& Vector3::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - default: - throw std::out_of_range("Array descriptor on Vector3 out of range!"); - } -} - - - -// Good, optimized chad version for doubles -template<> -void Vector3::LerpSelf(const Vector3& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - - #else - - x = it*x + t*other.x; - y = it*y + t*other.y; - z = it*z + t*other.z; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector3::LerpSelf(const Vector3& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - z = (int)(it * (double)z + t * (double)other.z); - - return; -} - -template<> -Vector3 Vector3::Lerp(const Vector3& other, double t) const -{ - Vector3d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector3 Vector3::Lerp(const Vector3& other, double t) const -{ - Vector3d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template<> -Vector3 Vector3::operator+(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector3( - sum[0], - sum[1], - sum[2] - ); - - #else - - return Vector3( - x + other.x, - y + other.y, - z + other.z - ); - #endif -} - -template -Vector3 Vector3::operator+(const Vector3& other) const -{ - return Vector3( - x + other.x, - y + other.y, - z + other.z - ); -} - - - -template<> -void Vector3::operator+=(const Vector3& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - - #else - - x += other.x; - y += other.y; - z += other.z; - - #endif - - return; -} - -template -void Vector3::operator+=(const Vector3& other) -{ - x += other.x; - y += other.y; - z += other.z; - return; -} - - - -template<> -Vector3 Vector3::operator-(const Vector3& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector3( - diff[0], - diff[1], - diff[2] - ); - - #else - - return Vector3( - x - other.x, - y - other.y, - z - other.z - ); - #endif -} - -template -Vector3 Vector3::operator-(const Vector3& other) const -{ - return Vector3( - x - other.x, - y - other.y, - z - other.z - ); -} - - - -template<> -void Vector3::operator-=(const Vector3& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - z = diff[2]; - - #else - - x -= other.x; - y -= other.y; - z -= other.z; - - #endif - - return; -} - -template -void Vector3::operator-=(const Vector3& other) -{ - x -= other.x; - y -= other.y; - z -= other.z; - return; -} - - - -template<> -Vector3 Vector3::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector3( - prod[0], - prod[1], - prod[2] - ); - - #else - - return Vector3( - x * scale, - y * scale, - z * scale - ); - - #endif -} - -template -Vector3 Vector3::operator*(const T scale) const -{ - return Vector3( - x * scale, - y * scale, - z * scale - ); -} - - - -template<> -void Vector3::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x *= scale; - y *= scale; - z *= scale; - - #endif - - return; -} - -template -void Vector3::operator*=(const T scale) -{ - x *= scale; - y *= scale; - z *= scale; - return; -} - - - -template<> -Vector3 Vector3::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector3( - prod[0], - prod[1], - prod[2] - ); - - #else - - return Vector3( - x / scale, - y / scale, - z / scale - ); - - #endif -} - -template -Vector3 Vector3::operator/(const T scale) const -{ - return Vector3( - x / scale, - y / scale, - z / scale - ); -} - - - -template<> -void Vector3::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(0, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - - #else - - x /= scale; - y /= scale; - z /= scale; - - #endif - return; -} - -template -void Vector3::operator/=(const T scale) -{ - x /= scale; - y /= scale; - z /= scale; - return; -} - - -// Good, optimized chad version for doubles -template<> -Vector3 Vector3::operator*(const Matrix4x4& mat) const -{ - Vector3 newVec; - - #ifndef _EULE_NO_INTRINSICS_ - // Store x, y, and z values - __m256d __vecx = _mm256_set1_pd(x); - __m256d __vecy = _mm256_set1_pd(y); - __m256d __vecz = _mm256_set1_pd(z); - - // Store matrix values - __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); - __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); - __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); - - // Multiply x, y, z and matrix values - __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); - __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); - __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); - - // Sum up the products - __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); - - // Store translation values - __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); - - // Add the translation values - __m256d __final = _mm256_add_pd(__sum, __translation); - - double dfinal[4]; - - _mm256_storeu_pd(dfinal, __final); - - newVec.x = dfinal[3]; - newVec.y = dfinal[2]; - newVec.z = dfinal[1]; - - #else - // Rotation, Scaling - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); - - // Translation - newVec.x += mat[0][3]; - newVec.y += mat[1][3]; - newVec.z += mat[2][3]; - #endif - - return newVec; -} - -// Slow, lame version for intcels -template<> -Vector3 Vector3::operator*(const Matrix4x4& mat) const -{ - Vector3 newVec; - - // Rotation, Scaling - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); - - // Translation - newVec.x += mat[0][3]; - newVec.y += mat[1][3]; - newVec.z += mat[2][3]; - - return Vector3( - (int)newVec.x, - (int)newVec.y, - (int)newVec.z - ); -} - - - -// Good, optimized chad version for doubles -template<> -void Vector3::operator*=(const Matrix4x4& mat) -{ - #ifndef _EULE_NO_INTRINSICS_ - // Store x, y, and z values - __m256d __vecx = _mm256_set1_pd(x); - __m256d __vecy = _mm256_set1_pd(y); - __m256d __vecz = _mm256_set1_pd(z); - - // Store matrix values - __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); - __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); - __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); - - // Multiply x, y, z and matrix values - __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); - __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); - __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); - - // Sum up the products - __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); - - // Store translation values - __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); - - // Add the translation values - __m256d __final = _mm256_add_pd(__sum, __translation); - - double dfinal[4]; - - _mm256_storeu_pd(dfinal, __final); - - x = dfinal[3]; - y = dfinal[2]; - z = dfinal[1]; - - #else - Vector3 buffer = *this; - x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z); - y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z); - z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z); - - // Translation - x += mat[0][3]; - y += mat[1][3]; - z += mat[2][3]; - #endif - - return; -} - -template -Vector3 Vector3::operator-() const -{ - return Vector3( - -x, - -y, - -z - ); -} - -template -void Vector3::operator=(const Vector3& other) -{ - x = other.x; - y = other.y; - z = other.z; - - return; -} - -template -void Vector3::operator=(Vector3&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - z = std::move(other.z); - - return; -} - -// Slow, lame version for intcels -template<> -void Vector3::operator*=(const Matrix4x4& mat) -{ - Vector3 buffer(x, y, z); - - x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z)); - y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z)); - z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z)); - - // Translation - x += (int)mat[0][3]; - y += (int)mat[1][3]; - z += (int)mat[2][3]; - - return; -} - - - -template -bool Vector3::operator==(const Vector3& other) const -{ - return - (x == other.x) && - (y == other.y) && - (z == other.z); -} - -template -bool Vector3::operator!=(const Vector3& other) const -{ - return !operator==(other); -} +#include "Vector3.h" #include "Vector2.h" #include "Vector4.h" + template -Vector3::operator Vector2() const +Eule::Vector3::operator Eule::Vector2() const { return Vector2(x, y); } template -Vector3::operator Vector4() const +Eule::Vector3::operator Eule::Vector4() const { return Vector4(x, y, z, 0); } - -template class Vector3; -template class Vector3; - -// Some handy predefines -template -const Vector3 Vector3::up(0, 1, 0); -template -const Vector3 Vector3::down(0, -1, 0); -template -const Vector3 Vector3::right(1, 0, 0); -template -const Vector3 Vector3::left(-1, 0, 0); -template -const Vector3 Vector3::forward(0, 0, 1); -template -const Vector3 Vector3::backward(0, 0, -1); -template -const Vector3 Vector3::one(1, 1, 1); -template -const Vector3 Vector3::zero(0, 0, 0); diff --git a/Eule/Vector3.h b/Eule/Vector3.h index 0d4b34a..d9284b9 100644 --- a/Eule/Vector3.h +++ b/Eule/Vector3.h @@ -5,6 +5,23 @@ #include #include "Matrix4x4.h" +#include "Math.h" +#include + +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ + namespace Eule { template class Vector2; @@ -108,4 +125,899 @@ namespace Eule typedef Vector3 Vector3i; typedef Vector3 Vector3d; + + // Good, optimized chad version for doubles +template<> +double Vector3::DotProduct(const Vector3& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components into registers + __m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x); + __m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x); + + // Define bitmask, and execute computation + const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot + __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); + + // Retrieve result, and return it + float result[8]; + _mm256_storeu_ps(result, __dot); + + return result[0]; + + #else + return (x * other.x) + + (y * other.y) + + (z * other.z); + #endif +} + +// Slow, lame version for intcels +template<> +double Vector3::DotProduct(const Vector3& other) const +{ + int iDot = (x * other.x) + (y * other.y) + (z * other.z); + return (double)iDot; +} + + + +// Good, optimized chad version for doubles +template<> +Vector3 Vector3::CrossProduct(const Vector3& other) const +{ + Vector3 cp; + cp.x = (y * other.z) - (z * other.y); + cp.y = (z * other.x) - (x * other.z); + cp.z = (x * other.y) - (y * other.x); + + return cp; +} + +// Slow, lame version for intcels +template<> +Vector3 Vector3::CrossProduct(const Vector3& other) const +{ + Vector3 cp; + cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y); + cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z); + cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x); + + return cp; +} + + + +// Good, optimized chad version for doubles +template<> +double Vector3::SqrMagnitude() const +{ + // x.DotProduct(x) == x.SqrMagnitude() + return DotProduct(*this); +} + +// Slow, lame version for intcels +template<> +double Vector3::SqrMagnitude() const +{ + int iSqrMag = x*x + y*y + z*z; + return (double)iSqrMag; +} + +template +double Vector3::Magnitude() const +{ + return sqrt(SqrMagnitude()); +} + + + +template<> +Vector3 Vector3::VectorScale(const Vector3& scalar) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector3( + result[0], + result[1], + result[2] + ); + + #else + + return Vector3( + x * scalar.x, + y * scalar.y, + z * scalar.z + ); + + #endif +} + +template<> +Vector3 Vector3::VectorScale(const Vector3& scalar) const +{ + return Vector3( + x * scalar.x, + y * scalar.y, + z * scalar.z + ); +} + + + +template +Vector3 Vector3::Normalize() const +{ + Vector3 norm(x, y, z); + norm.NormalizeSelf(); + + return norm; +} + +// Method to normalize a Vector3d +template<> +void Vector3::NormalizeSelf() +{ + const double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + z = 0; + } + else + { + #ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(0, z, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + + #else + + x /= length; + y /= length; + z /= length; + + #endif + } + + return; +} + +// You can't normalize an int vector, ffs! +// But we need an implementation for T=int +template<> +void Vector3::NormalizeSelf() +{ + std::cerr << "Stop normalizing int-vectors!!" << std::endl; + x = 0; + y = 0; + z = 0; + + return; +} + + + +template +bool Vector3::Similar(const Vector3& other, double epsilon) const +{ + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) && + (::Eule::Math::Similar(z, other.z, epsilon)) + ; +} + +template +Vector3 Vector3::ToInt() const +{ + return Vector3((int)x, (int)y, (int)z); +} + +template +Vector3 Vector3::ToDouble() const +{ + return Vector3((double)x, (double)y, (double)z); +} + +template +T& Vector3::operator[](std::size_t idx) +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + default: + throw std::out_of_range("Array descriptor on Vector3 out of range!"); + } +} + +template +const T& Vector3::operator[](std::size_t idx) const +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + default: + throw std::out_of_range("Array descriptor on Vector3 out of range!"); + } +} + + + +// Good, optimized chad version for doubles +template<> +void Vector3::LerpSelf(const Vector3& other, double t) +{ + const double it = 1.0 - t; // Inverse t + + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + + #else + + x = it*x + t*other.x; + y = it*y + t*other.y; + z = it*z + t*other.z; + + #endif + + return; +} + + + +// Slow, lame version for intcels +template<> +void Vector3::LerpSelf(const Vector3& other, double t) +{ + const double it = 1.0 - t; // Inverse t + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + z = (int)(it * (double)z + t * (double)other.z); + + return; +} + +template<> +Vector3 Vector3::Lerp(const Vector3& other, double t) const +{ + Vector3d copy(*this); + copy.LerpSelf(other, t); + + return copy; +} + +template<> +Vector3 Vector3::Lerp(const Vector3& other, double t) const +{ + Vector3d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; +} + + + +template<> +Vector3 Vector3::operator+(const Vector3& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector3( + sum[0], + sum[1], + sum[2] + ); + + #else + + return Vector3( + x + other.x, + y + other.y, + z + other.z + ); + #endif +} + +template +Vector3 Vector3::operator+(const Vector3& other) const +{ + return Vector3( + x + other.x, + y + other.y, + z + other.z + ); +} + + + +template<> +void Vector3::operator+=(const Vector3& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + + #else + + x += other.x; + y += other.y; + z += other.z; + + #endif + + return; +} + +template +void Vector3::operator+=(const Vector3& other) +{ + x += other.x; + y += other.y; + z += other.z; + return; +} + + + +template<> +Vector3 Vector3::operator-(const Vector3& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector3( + diff[0], + diff[1], + diff[2] + ); + + #else + + return Vector3( + x - other.x, + y - other.y, + z - other.z + ); + #endif +} + +template +Vector3 Vector3::operator-(const Vector3& other) const +{ + return Vector3( + x - other.x, + y - other.y, + z - other.z + ); +} + + + +template<> +void Vector3::operator-=(const Vector3& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + z = diff[2]; + + #else + + x -= other.x; + y -= other.y; + z -= other.z; + + #endif + + return; +} + +template +void Vector3::operator-=(const Vector3& other) +{ + x -= other.x; + y -= other.y; + z -= other.z; + return; +} + + + +template<> +Vector3 Vector3::operator*(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector3( + prod[0], + prod[1], + prod[2] + ); + + #else + + return Vector3( + x * scale, + y * scale, + z * scale + ); + + #endif +} + +template +Vector3 Vector3::operator*(const T scale) const +{ + return Vector3( + x * scale, + y * scale, + z * scale + ); +} + + + +template<> +void Vector3::operator*=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + + #else + + x *= scale; + y *= scale; + z *= scale; + + #endif + + return; +} + +template +void Vector3::operator*=(const T scale) +{ + x *= scale; + y *= scale; + z *= scale; + return; +} + + + +template<> +Vector3 Vector3::operator/(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector3( + prod[0], + prod[1], + prod[2] + ); + + #else + + return Vector3( + x / scale, + y / scale, + z / scale + ); + + #endif +} + +template +Vector3 Vector3::operator/(const T scale) const +{ + return Vector3( + x / scale, + y / scale, + z / scale + ); +} + + + +template<> +void Vector3::operator/=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(0, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + + #else + + x /= scale; + y /= scale; + z /= scale; + + #endif + return; +} + +template +void Vector3::operator/=(const T scale) +{ + x /= scale; + y /= scale; + z /= scale; + return; +} + + +// Good, optimized chad version for doubles +template<> +Vector3 Vector3::operator*(const Matrix4x4& mat) const +{ + Vector3 newVec; + + #ifndef _EULE_NO_INTRINSICS_ + // Store x, y, and z values + __m256d __vecx = _mm256_set1_pd(x); + __m256d __vecy = _mm256_set1_pd(y); + __m256d __vecz = _mm256_set1_pd(z); + + // Store matrix values + __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); + __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); + __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); + + // Multiply x, y, z and matrix values + __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); + __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); + __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); + + // Sum up the products + __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); + + // Store translation values + __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); + + // Add the translation values + __m256d __final = _mm256_add_pd(__sum, __translation); + + double dfinal[4]; + + _mm256_storeu_pd(dfinal, __final); + + newVec.x = dfinal[3]; + newVec.y = dfinal[2]; + newVec.z = dfinal[1]; + + #else + // Rotation, Scaling + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); + + // Translation + newVec.x += mat[0][3]; + newVec.y += mat[1][3]; + newVec.z += mat[2][3]; + #endif + + return newVec; +} + +// Slow, lame version for intcels +template<> +Vector3 Vector3::operator*(const Matrix4x4& mat) const +{ + Vector3 newVec; + + // Rotation, Scaling + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); + + // Translation + newVec.x += mat[0][3]; + newVec.y += mat[1][3]; + newVec.z += mat[2][3]; + + return Vector3( + (int)newVec.x, + (int)newVec.y, + (int)newVec.z + ); +} + + + +// Good, optimized chad version for doubles +template<> +void Vector3::operator*=(const Matrix4x4& mat) +{ + #ifndef _EULE_NO_INTRINSICS_ + // Store x, y, and z values + __m256d __vecx = _mm256_set1_pd(x); + __m256d __vecy = _mm256_set1_pd(y); + __m256d __vecz = _mm256_set1_pd(z); + + // Store matrix values + __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); + __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); + __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); + + // Multiply x, y, z and matrix values + __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); + __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); + __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); + + // Sum up the products + __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); + + // Store translation values + __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); + + // Add the translation values + __m256d __final = _mm256_add_pd(__sum, __translation); + + double dfinal[4]; + + _mm256_storeu_pd(dfinal, __final); + + x = dfinal[3]; + y = dfinal[2]; + z = dfinal[1]; + + #else + Vector3 buffer = *this; + x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z); + y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z); + z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z); + + // Translation + x += mat[0][3]; + y += mat[1][3]; + z += mat[2][3]; + #endif + + return; +} + +template +Vector3 Vector3::operator-() const +{ + return Vector3( + -x, + -y, + -z + ); +} + +template +void Vector3::operator=(const Vector3& other) +{ + x = other.x; + y = other.y; + z = other.z; + + return; +} + +template +void Vector3::operator=(Vector3&& other) noexcept +{ + x = std::move(other.x); + y = std::move(other.y); + z = std::move(other.z); + + return; +} + +// Slow, lame version for intcels +template<> +void Vector3::operator*=(const Matrix4x4& mat) +{ + Vector3 buffer(x, y, z); + + x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z)); + y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z)); + z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z)); + + // Translation + x += (int)mat[0][3]; + y += (int)mat[1][3]; + z += (int)mat[2][3]; + + return; +} + + + +template +bool Vector3::operator==(const Vector3& other) const +{ + return + (x == other.x) && + (y == other.y) && + (z == other.z); +} + +template +bool Vector3::operator!=(const Vector3& other) const +{ + return !operator==(other); +} + +template class Vector3; +template class Vector3; + +// Some handy predefines +template +const Vector3 Vector3::up(0, 1, 0); +template +const Vector3 Vector3::down(0, -1, 0); +template +const Vector3 Vector3::right(1, 0, 0); +template +const Vector3 Vector3::left(-1, 0, 0); +template +const Vector3 Vector3::forward(0, 0, 1); +template +const Vector3 Vector3::backward(0, 0, -1); +template +const Vector3 Vector3::one(1, 1, 1); +template +const Vector3 Vector3::zero(0, 0, 0); + } diff --git a/Eule/Vector4.cpp b/Eule/Vector4.cpp index 2c4a75c..0810f7f 100644 --- a/Eule/Vector4.cpp +++ b/Eule/Vector4.cpp @@ -1,831 +1,15 @@ #include "Vector4.h" -#include "Math.h" -#include - -//#define _EULE_NO_INTRINSICS_ -#ifndef _EULE_NO_INTRINSICS_ -#include -#endif - -using namespace Eule; - -/* - NOTE: - Here you will find bad, unoptimized methods for T=int. - This is because the compiler needs a method for each type in each instantiation of the template! - I can't generalize the methods when heavily optimizing for doubles. - These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. - The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. -*/ - -// Good, optimized chad version for doubles -template<> -double Vector4::SqrMagnitude() const -{ - return (x * x) + - (y * y) + - (z * z) + - (w * w); -} - -// Slow, lame version for intcels -template<> -double Vector4::SqrMagnitude() const -{ - int iSqrMag = x*x + y*y + z*z + w*w; - return (double)iSqrMag; -} - -template -double Vector4::Magnitude() const -{ - return sqrt(SqrMagnitude()); -} - - -template<> -Vector4 Vector4::VectorScale(const Vector4& scalar) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Load vectors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x); - - // Multiply them - __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); - - // Retrieve result - double result[4]; - _mm256_storeu_pd(result, __product); - - // Return value - return Vector4( - result[0], - result[1], - result[2], - result[3] - ); - - #else - - return Vector4( - x * scalar.x, - y * scalar.y, - z * scalar.z, - w * scalar.w - ); - #endif -} - - -template<> -Vector4 Vector4::VectorScale(const Vector4& scalar) const -{ - return Vector4( - x * scalar.x, - y * scalar.y, - z * scalar.z, - w * scalar.w - ); -} - - - -template -Vector4 Vector4::Normalize() const -{ - Vector4 norm(x, y, z, w); - norm.NormalizeSelf(); - - return norm; -} - -// Method to normalize a Vector4d -template<> -void Vector4::NormalizeSelf() -{ - double length = Magnitude(); - - // Prevent division by 0 - if (length == 0) - { - x = 0; - y = 0; - z = 0; - w = 0; - } - else - { - #ifndef _EULE_NO_INTRINSICS_ - - // Load vector and length into registers - __m256d __vec = _mm256_set_pd(w, z, y, x); - __m256d __len = _mm256_set1_pd(length); - - // Divide - __m256d __prod = _mm256_div_pd(__vec, __len); - - // Extract and set values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x /= length; - y /= length; - z /= length; - w /= length; - - #endif - } - - return; -} - -// You can't normalize an int vector, ffs! -// But we need an implementation for T=int -template<> -void Vector4::NormalizeSelf() -{ - std::cerr << "Stop normalizing int-vectors!!" << std::endl; - x = 0; - y = 0; - z = 0; - w = 0; - - return; -} - - - -template -bool Vector4::Similar(const Vector4& other, double epsilon) const -{ - return - (::Math::Similar(x, other.x, epsilon)) && - (::Math::Similar(y, other.y, epsilon)) && - (::Math::Similar(z, other.z, epsilon)) && - (::Math::Similar(w, other.w, epsilon)) - ; -} - -template -Vector4 Vector4::ToInt() const -{ - return Vector4((int)x, (int)y, (int)z, (int)w); -} - -template -Vector4 Vector4::ToDouble() const -{ - return Vector4((double)x, (double)y, (double)z, (double)w); -} - -template -T& Vector4::operator[](std::size_t idx) -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - case 3: - return w; - default: - throw std::out_of_range("Array descriptor on Vector4 out of range!"); - } -} - -template -const T& Vector4::operator[](std::size_t idx) const -{ - switch (idx) - { - case 0: - return x; - case 1: - return y; - case 2: - return z; - case 3: - return w; - default: - throw std::out_of_range("Array descriptor on Vector4 out of range!"); - } -} - - - -// Good, optimized chad version for doubles -template<> -void Vector4::LerpSelf(const Vector4& other, double t) -{ - const double it = 1.0 - t; // Inverse t - - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - __m256d __t = _mm256_set1_pd(t); - __m256d __it = _mm256_set1_pd(it); // Inverse t - - // Procedure: - // (__vector_self * __it) + (__vector_other * __t) - - __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications - - __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); - __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); - - // Retrieve result, and apply it - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - w = sum[3]; - - #else - - x = it * x + t * other.x; - y = it * y + t * other.y; - z = it * z + t * other.z; - w = it * w + t * other.w; - - #endif - - return; -} - - - -// Slow, lame version for intcels -template<> -void Vector4::LerpSelf(const Vector4& other, double t) -{ - const double it = 1.0 - t; - - x = (int)(it * (double)x + t * (double)other.x); - y = (int)(it * (double)y + t * (double)other.y); - z = (int)(it * (double)z + t * (double)other.z); - w = (int)(it * (double)w + t * (double)other.w); - - return; -} - -template<> -Vector4 Vector4::Lerp(const Vector4& other, double t) const -{ - Vector4d copy(*this); - copy.LerpSelf(other, t); - - return copy; -} - -template<> -Vector4 Vector4::Lerp(const Vector4& other, double t) const -{ - Vector4d copy(this->ToDouble()); - copy.LerpSelf(other.ToDouble(), t); - - return copy; -} - - - -template<> -Vector4 Vector4::operator+(const Vector4& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - return Vector4( - sum[0], - sum[1], - sum[2], - sum[3] - ); - - #else - - return Vector4( - x + other.x, - y + other.y, - z + other.z, - w + other.w - ); - #endif -} - -template -Vector4 Vector4::operator+(const Vector4& other) const -{ - return Vector4( - x + other.x, - y + other.y, - z + other.z, - w + other.w - ); -} - - - -template<> -void Vector4::operator+=(const Vector4& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Add the components - __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double sum[4]; - _mm256_storeu_pd(sum, __sum); - - x = sum[0]; - y = sum[1]; - z = sum[2]; - w = sum[3]; - - #else - - x += other.x; - y += other.y; - z += other.z; - w += other.w; - - #endif - - return; -} - -template -void Vector4::operator+=(const Vector4& other) -{ - x += other.x; - y += other.y; - z += other.z; - w += other.w; - return; -} - - - -template<> -Vector4 Vector4::operator-(const Vector4& other) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and return these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - return Vector4( - diff[0], - diff[1], - diff[2], - diff[3] - ); - - #else - - return Vector4( - x - other.x, - y - other.y, - z - other.z, - w - other.w - ); - #endif -} - -template -Vector4 Vector4::operator-(const Vector4& other) const -{ - return Vector4( - x - other.x, - y - other.y, - z - other.z, - w - other.w - ); -} - - - -template<> -void Vector4::operator-=(const Vector4& other) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); - - // Subtract the components - __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); - - // Retrieve and apply these values - double diff[4]; - _mm256_storeu_pd(diff, __diff); - - x = diff[0]; - y = diff[1]; - z = diff[2]; - w = diff[3]; - - #else - - x -= other.x; - y -= other.y; - z -= other.z; - w -= other.w; - - #endif - - return; -} - -template -void Vector4::operator-=(const Vector4& other) -{ - x -= other.x; - y -= other.y; - z -= other.z; - w -= other.w; - return; -} - - - -template<> -Vector4 Vector4::operator*(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector4( - prod[0], - prod[1], - prod[2], - prod[3] - ); - - #else - - return Vector4( - x * scale, - y * scale, - z * scale, - w * scale - ); - - #endif -} - -template -Vector4 Vector4::operator*(const T scale) const -{ - return Vector4( - x * scale, - y * scale, - z * scale, - w * scale - ); -} - - - -template<> -void Vector4::operator*=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Multiply the components - __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x *= scale; - y *= scale; - z *= scale; - w *= scale; - - #endif - - return; -} - -template -void Vector4::operator*=(const T scale) -{ - x *= scale; - y *= scale; - z *= scale; - w *= scale; - return; -} - - - -template<> -Vector4 Vector4::operator/(const double scale) const -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and return these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - return Vector4( - prod[0], - prod[1], - prod[2], - prod[3] - ); - - #else - - return Vector4( - x / scale, - y / scale, - z / scale, - w / scale - ); - - #endif -} - -template -Vector4 Vector4::operator/(const T scale) const -{ - return Vector4( - x / scale, - y / scale, - z / scale, - w / scale - ); -} - - - -template<> -void Vector4::operator/=(const double scale) -{ - #ifndef _EULE_NO_INTRINSICS_ - - // Move vector components and factors into registers - __m256d __vector_self = _mm256_set_pd(w, z, y, x); - __m256d __scalar = _mm256_set1_pd(scale); - - // Divide the components - __m256d __prod = _mm256_div_pd(__vector_self, __scalar); - - // Retrieve and apply these values - double prod[4]; - _mm256_storeu_pd(prod, __prod); - - x = prod[0]; - y = prod[1]; - z = prod[2]; - w = prod[3]; - - #else - - x /= scale; - y /= scale; - z /= scale; - w /= scale; - - #endif - return; -} - -template -void Vector4::operator/=(const T scale) -{ - x /= scale; - y /= scale; - z /= scale; - w /= scale; - return; -} - - - -template -bool Vector4::operator==(const Vector4& other) const -{ - return - (x == other.x) && - (y == other.y) && - (z == other.z) && - (w == other.w); -} - - - -// Good, optimized chad version for doubles -template<> -Vector4 Vector4::operator*(const Matrix4x4& mat) const -{ - Vector4 newVec; - - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); - newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); - - return newVec; -} - -// Slow, lame version for intcels -template<> -Vector4 Vector4::operator*(const Matrix4x4& mat) const -{ - Vector4 newVec; - - newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); - newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); - newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); - newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); - - return Vector4( - (int)newVec.x, - (int)newVec.y, - (int)newVec.z, - (int)newVec.w - ); -} - - - -// Good, optimized chad version for doubles -template<> -void Vector4::operator*=(const Matrix4x4& mat) -{ - Vector4 buffer = *this; - - // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] - // idk right now. check that if something doesn't work - x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w); - y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w); - z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w); - w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w); - - return; -} - -template -Vector4 Vector4::operator-() const -{ - return Vector4( - -x, - -y, - -z, - -w - ); -} - -template -void Vector4::operator=(const Vector4& other) -{ - x = other.x; - y = other.y; - z = other.z; - w = other.w; - - return; -} - -template -void Vector4::operator=(Vector4&& other) noexcept -{ - x = std::move(other.x); - y = std::move(other.y); - z = std::move(other.z); - w = std::move(other.w); - - return; -} - -// Slow, lame version for intcels -template<> -void Vector4::operator*=(const Matrix4x4& mat) -{ - Vector4 buffer(x, y, z, w); - - // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] - // idk right now. check that if something doesn't work - x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w)); - y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w)); - z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w)); - w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w)); - - return; -} - -template -bool Vector4::operator!=(const Vector4& other) const -{ - return !operator==(other); -} - #include "Vector2.h" #include "Vector3.h" + template -Vector4::operator Vector2() const +Eule::Vector4::operator Eule::Vector2() const { return Vector2(x, y); } template -Vector4::operator Vector3() const +Eule::Vector4::operator Eule::Vector3() const { return Vector3(x, y, z); } - -template class Vector4; -template class Vector4; - -// Some handy predefines -template -const Vector4 Vector4::up(0, 1, 0, 0); -template -const Vector4 Vector4::down(0, -1, 0, 0); -template -const Vector4 Vector4::right(1, 0, 0, 0); -template -const Vector4 Vector4::left(-1, 0, 0, 0); -template -const Vector4 Vector4::forward(1, 0, 0, 0); -template -const Vector4 Vector4::backward(-1, 0, 0, 0); -template -const Vector4 Vector4::future(0, 0, 0, 1); -template -const Vector4 Vector4::past(0, 0, 0, -1); -template -const Vector4 Vector4::one(1, 1, 1, 1); -template -const Vector4 Vector4::zero(0, 0, 0, 0); diff --git a/Eule/Vector4.h b/Eule/Vector4.h index 0f34b4f..5472cf5 100644 --- a/Eule/Vector4.h +++ b/Eule/Vector4.h @@ -5,6 +5,22 @@ #include #include "Matrix4x4.h" +#include "Math.h" +#include + +//#define _EULE_NO_INTRINSICS_ +#ifndef _EULE_NO_INTRINSICS_ +#include +#endif + +/* + NOTE: + Here you will find bad, unoptimized methods for T=int. + This is because the compiler needs a method for each type in each instantiation of the template! + I can't generalize the methods when heavily optimizing for doubles. + These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. + The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. +*/ namespace Eule { template class Vector2; @@ -105,4 +121,803 @@ namespace Eule typedef Vector4 Vector4i; typedef Vector4 Vector4d; + + // Good, optimized chad version for doubles +template<> +double Vector4::SqrMagnitude() const +{ + return (x * x) + + (y * y) + + (z * z) + + (w * w); +} + +// Slow, lame version for intcels +template<> +double Vector4::SqrMagnitude() const +{ + int iSqrMag = x*x + y*y + z*z + w*w; + return (double)iSqrMag; +} + +template +double Vector4::Magnitude() const +{ + return sqrt(SqrMagnitude()); +} + + +template<> +Vector4 Vector4::VectorScale(const Vector4& scalar) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Load vectors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x); + + // Multiply them + __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); + + // Retrieve result + double result[4]; + _mm256_storeu_pd(result, __product); + + // Return value + return Vector4( + result[0], + result[1], + result[2], + result[3] + ); + + #else + + return Vector4( + x * scalar.x, + y * scalar.y, + z * scalar.z, + w * scalar.w + ); + #endif +} + + +template<> +Vector4 Vector4::VectorScale(const Vector4& scalar) const +{ + return Vector4( + x * scalar.x, + y * scalar.y, + z * scalar.z, + w * scalar.w + ); +} + + + +template +Vector4 Vector4::Normalize() const +{ + Vector4 norm(x, y, z, w); + norm.NormalizeSelf(); + + return norm; +} + +// Method to normalize a Vector4d +template<> +void Vector4::NormalizeSelf() +{ + double length = Magnitude(); + + // Prevent division by 0 + if (length == 0) + { + x = 0; + y = 0; + z = 0; + w = 0; + } + else + { + #ifndef _EULE_NO_INTRINSICS_ + + // Load vector and length into registers + __m256d __vec = _mm256_set_pd(w, z, y, x); + __m256d __len = _mm256_set1_pd(length); + + // Divide + __m256d __prod = _mm256_div_pd(__vec, __len); + + // Extract and set values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + + #else + + x /= length; + y /= length; + z /= length; + w /= length; + + #endif + } + + return; +} + +// You can't normalize an int vector, ffs! +// But we need an implementation for T=int +template<> +void Vector4::NormalizeSelf() +{ + std::cerr << "Stop normalizing int-vectors!!" << std::endl; + x = 0; + y = 0; + z = 0; + w = 0; + + return; +} + + + +template +bool Vector4::Similar(const Vector4& other, double epsilon) const +{ + return + (::Eule::Math::Similar(x, other.x, epsilon)) && + (::Eule::Math::Similar(y, other.y, epsilon)) && + (::Eule::Math::Similar(z, other.z, epsilon)) && + (::Eule::Math::Similar(w, other.w, epsilon)) + ; +} + +template +Vector4 Vector4::ToInt() const +{ + return Vector4((int)x, (int)y, (int)z, (int)w); +} + +template +Vector4 Vector4::ToDouble() const +{ + return Vector4((double)x, (double)y, (double)z, (double)w); +} + +template +T& Vector4::operator[](std::size_t idx) +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + default: + throw std::out_of_range("Array descriptor on Vector4 out of range!"); + } +} + +template +const T& Vector4::operator[](std::size_t idx) const +{ + switch (idx) + { + case 0: + return x; + case 1: + return y; + case 2: + return z; + case 3: + return w; + default: + throw std::out_of_range("Array descriptor on Vector4 out of range!"); + } +} + + + +// Good, optimized chad version for doubles +template<> +void Vector4::LerpSelf(const Vector4& other, double t) +{ + const double it = 1.0 - t; // Inverse t + + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + __m256d __t = _mm256_set1_pd(t); + __m256d __it = _mm256_set1_pd(it); // Inverse t + + // Procedure: + // (__vector_self * __it) + (__vector_other * __t) + + __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications + + __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); + __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); + + // Retrieve result, and apply it + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + w = sum[3]; + + #else + + x = it * x + t * other.x; + y = it * y + t * other.y; + z = it * z + t * other.z; + w = it * w + t * other.w; + + #endif + + return; +} + + + +// Slow, lame version for intcels +template<> +void Vector4::LerpSelf(const Vector4& other, double t) +{ + const double it = 1.0 - t; + + x = (int)(it * (double)x + t * (double)other.x); + y = (int)(it * (double)y + t * (double)other.y); + z = (int)(it * (double)z + t * (double)other.z); + w = (int)(it * (double)w + t * (double)other.w); + + return; +} + +template<> +Vector4 Vector4::Lerp(const Vector4& other, double t) const +{ + Vector4d copy(*this); + copy.LerpSelf(other, t); + + return copy; +} + +template<> +Vector4 Vector4::Lerp(const Vector4& other, double t) const +{ + Vector4d copy(this->ToDouble()); + copy.LerpSelf(other.ToDouble(), t); + + return copy; +} + + + +template<> +Vector4 Vector4::operator+(const Vector4& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + return Vector4( + sum[0], + sum[1], + sum[2], + sum[3] + ); + + #else + + return Vector4( + x + other.x, + y + other.y, + z + other.z, + w + other.w + ); + #endif +} + +template +Vector4 Vector4::operator+(const Vector4& other) const +{ + return Vector4( + x + other.x, + y + other.y, + z + other.z, + w + other.w + ); +} + + + +template<> +void Vector4::operator+=(const Vector4& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Add the components + __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double sum[4]; + _mm256_storeu_pd(sum, __sum); + + x = sum[0]; + y = sum[1]; + z = sum[2]; + w = sum[3]; + + #else + + x += other.x; + y += other.y; + z += other.z; + w += other.w; + + #endif + + return; +} + +template +void Vector4::operator+=(const Vector4& other) +{ + x += other.x; + y += other.y; + z += other.z; + w += other.w; + return; +} + + + +template<> +Vector4 Vector4::operator-(const Vector4& other) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and return these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + return Vector4( + diff[0], + diff[1], + diff[2], + diff[3] + ); + + #else + + return Vector4( + x - other.x, + y - other.y, + z - other.z, + w - other.w + ); + #endif +} + +template +Vector4 Vector4::operator-(const Vector4& other) const +{ + return Vector4( + x - other.x, + y - other.y, + z - other.z, + w - other.w + ); +} + + + +template<> +void Vector4::operator-=(const Vector4& other) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x); + + // Subtract the components + __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); + + // Retrieve and apply these values + double diff[4]; + _mm256_storeu_pd(diff, __diff); + + x = diff[0]; + y = diff[1]; + z = diff[2]; + w = diff[3]; + + #else + + x -= other.x; + y -= other.y; + z -= other.z; + w -= other.w; + + #endif + + return; +} + +template +void Vector4::operator-=(const Vector4& other) +{ + x -= other.x; + y -= other.y; + z -= other.z; + w -= other.w; + return; +} + + + +template<> +Vector4 Vector4::operator*(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector4( + prod[0], + prod[1], + prod[2], + prod[3] + ); + + #else + + return Vector4( + x * scale, + y * scale, + z * scale, + w * scale + ); + + #endif +} + +template +Vector4 Vector4::operator*(const T scale) const +{ + return Vector4( + x * scale, + y * scale, + z * scale, + w * scale + ); +} + + + +template<> +void Vector4::operator*=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Multiply the components + __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + + #else + + x *= scale; + y *= scale; + z *= scale; + w *= scale; + + #endif + + return; +} + +template +void Vector4::operator*=(const T scale) +{ + x *= scale; + y *= scale; + z *= scale; + w *= scale; + return; +} + + + +template<> +Vector4 Vector4::operator/(const double scale) const +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and return these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + return Vector4( + prod[0], + prod[1], + prod[2], + prod[3] + ); + + #else + + return Vector4( + x / scale, + y / scale, + z / scale, + w / scale + ); + + #endif +} + +template +Vector4 Vector4::operator/(const T scale) const +{ + return Vector4( + x / scale, + y / scale, + z / scale, + w / scale + ); +} + + + +template<> +void Vector4::operator/=(const double scale) +{ + #ifndef _EULE_NO_INTRINSICS_ + + // Move vector components and factors into registers + __m256d __vector_self = _mm256_set_pd(w, z, y, x); + __m256d __scalar = _mm256_set1_pd(scale); + + // Divide the components + __m256d __prod = _mm256_div_pd(__vector_self, __scalar); + + // Retrieve and apply these values + double prod[4]; + _mm256_storeu_pd(prod, __prod); + + x = prod[0]; + y = prod[1]; + z = prod[2]; + w = prod[3]; + + #else + + x /= scale; + y /= scale; + z /= scale; + w /= scale; + + #endif + return; +} + +template +void Vector4::operator/=(const T scale) +{ + x /= scale; + y /= scale; + z /= scale; + w /= scale; + return; +} + + + +template +bool Vector4::operator==(const Vector4& other) const +{ + return + (x == other.x) && + (y == other.y) && + (z == other.z) && + (w == other.w); +} + + + +// Good, optimized chad version for doubles +template<> +Vector4 Vector4::operator*(const Matrix4x4& mat) const +{ + Vector4 newVec; + + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); + newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); + + return newVec; +} + +// Slow, lame version for intcels +template<> +Vector4 Vector4::operator*(const Matrix4x4& mat) const +{ + Vector4 newVec; + + newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w); + newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w); + newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w); + newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w); + + return Vector4( + (int)newVec.x, + (int)newVec.y, + (int)newVec.z, + (int)newVec.w + ); +} + + + +// Good, optimized chad version for doubles +template<> +void Vector4::operator*=(const Matrix4x4& mat) +{ + Vector4 buffer = *this; + + // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] + // idk right now. check that if something doesn't work + x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w); + y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w); + z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w); + w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w); + + return; +} + +template +Vector4 Vector4::operator-() const +{ + return Vector4( + -x, + -y, + -z, + -w + ); +} + +template +void Vector4::operator=(const Vector4& other) +{ + x = other.x; + y = other.y; + z = other.z; + w = other.w; + + return; +} + +template +void Vector4::operator=(Vector4&& other) noexcept +{ + x = std::move(other.x); + y = std::move(other.y); + z = std::move(other.z); + w = std::move(other.w); + + return; +} + +// Slow, lame version for intcels +template<> +void Vector4::operator*=(const Matrix4x4& mat) +{ + Vector4 buffer(x, y, z, w); + + // Should this still be reversed...? like, instead of mat[x][y], use mat[y][m] + // idk right now. check that if something doesn't work + x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w)); + y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w)); + z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w)); + w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w)); + + return; +} + +template +bool Vector4::operator!=(const Vector4& other) const +{ + return !operator==(other); +} + +template class Vector4; +template class Vector4; + +// Some handy predefines +template +const Vector4 Vector4::up(0, 1, 0, 0); +template +const Vector4 Vector4::down(0, -1, 0, 0); +template +const Vector4 Vector4::right(1, 0, 0, 0); +template +const Vector4 Vector4::left(-1, 0, 0, 0); +template +const Vector4 Vector4::forward(1, 0, 0, 0); +template +const Vector4 Vector4::backward(-1, 0, 0, 0); +template +const Vector4 Vector4::future(0, 0, 0, 1); +template +const Vector4 Vector4::past(0, 0, 0, -1); +template +const Vector4 Vector4::one(1, 1, 1, 1); +template +const Vector4 Vector4::zero(0, 0, 0, 0); + } diff --git a/Eule/version.h b/Eule/version.h index c4dc0de..526875d 100644 --- a/Eule/version.h +++ b/Eule/version.h @@ -1,2 +1,2 @@ #pragma once -#define EULE_VERSION (0.1) \ No newline at end of file +#define EULE_VERSION (0.11) \ No newline at end of file