#pragma once /*** ./../Eule/Matrix4x4.h ***/ #pragma once #include #include #include namespace Eule { template class Vector3; typedef Vector3 Vector3d; /** A matrix 4x4 class representing a 3d transformation. * This matrix consists of a 3x3 matrix containing scaling and rotation information, and a vector (d,h,l) * representing the translation. * * ``` * myMatrix[y][x] = 3 * * X ==============> * Y * | # # # # # # # # # # # * | # a b c d # * | # # * | # e f g h # * | # # * V # i j k l # * # # * # m n o p # * # # # # # # # # # # # * * ``` * * Note: This class can also be used to compute regular 4x4 multiplications. Use Multiply4x4() for that. */ class Matrix4x4 { public: Matrix4x4(); Matrix4x4(const Matrix4x4& other); Matrix4x4(Matrix4x4&& other) noexcept; //! Array holding the matrices values std::array, 4> v; Matrix4x4 operator*(const Matrix4x4& other) const; void operator*=(const Matrix4x4& other); Matrix4x4 operator/(const Matrix4x4& other) const; void operator/=(const Matrix4x4& other); //! Cellwise scaling Matrix4x4 operator*(const double scalar) const; //! Cellwise scaling void operator*=(const double scalar); //! Cellwise division Matrix4x4 operator/(const double denominator) const; //! Cellwise division void operator/=(const double denominator); //! Cellwise addition Matrix4x4 operator+(const Matrix4x4& other) const; //! Cellwise addition void operator+=(const Matrix4x4& other); //! Cellwise subtraction Matrix4x4 operator-(const Matrix4x4& other) const; //! Cellwise subtraction void operator-=(const Matrix4x4& other); std::array& operator[](std::size_t y); const std::array& operator[](std::size_t y) const; void operator=(const Matrix4x4& other); void operator=(Matrix4x4&& other) noexcept; bool operator==(const Matrix4x4& other); bool operator==(const Matrix4x4& other) const; bool operator!=(const Matrix4x4& other); bool operator!=(const Matrix4x4& other) const; //! Will return d,h,l as a Vector3d(x,y,z) const Vector3d GetTranslationComponent() const; //! Will set d,h,l from a Vector3d(x,y,z) void SetTranslationComponent(const Vector3d& trans); //! Will return this Matrix4x4 with d,h,l being set to 0 Matrix4x4 DropTranslationComponents() const; //! Will return the 3x3 transpose of this matrix Matrix4x4 Transpose3x3() const; //! Will return the 4x4 transpose of this matrix Matrix4x4 Transpose4x4() const; //! Will return the Matrix4x4 of an actual 4x4 multiplication. operator* only does a 3x3 Matrix4x4 Multiply4x4(const Matrix4x4& o) const; //! Will return the cofactors of this matrix, by dimension n Matrix4x4 GetCofactors(std::size_t p, std::size_t q, std::size_t n) const; //! Will return the determinant, by dimension n double Determinant(std::size_t n) const; //! Will return the adjoint of this matrix, by dimension n Matrix4x4 Adjoint(std::size_t n) const; //! Will return the 3x3-inverse of this matrix. //! Meaning, the 3x3 component will be inverted, and the translation component will be negated Matrix4x4 Inverse3x3() const; //! Will return the full 4x4-inverse of this matrix Matrix4x4 Inverse4x4() const; //! Will check if the 3x3-component is inversible bool IsInversible3x3() const; //! Will check if the entire matrix is inversible bool IsInversible4x4() const; //! Will compare if two matrices are similar to a certain epsilon value bool Similar(const Matrix4x4& other, double epsilon = 0.00001) const; friend std::ostream& operator<< (std::ostream& os, const Matrix4x4& m); friend std::wostream& operator<< (std::wostream& os, const Matrix4x4& m); // Shorthands double& a = v[0][0]; double& b = v[0][1]; double& c = v[0][2]; double& d = v[0][3]; double& e = v[1][0]; double& f = v[1][1]; double& g = v[1][2]; double& h = v[1][3]; double& i = v[2][0]; double& j = v[2][1]; double& k = v[2][2]; double& l = v[2][3]; double& m = v[3][0]; double& n = v[3][1]; double& o = v[3][2]; double& p = v[3][3]; }; } /*** ./../Eule/Math.h ***/ #pragma once #include namespace Eule { /** Math utility class containing basic functions. */ class Math { public: //! Will return the bigger of two values [[nodiscard]] static constexpr double Max(const double a, const double b); //! Will return the smaller of two values [[nodiscard]] static constexpr double Min(const double a, const double b); //! Will return `v`, but at least `min`, and at most `max` [[nodiscard]] static constexpr double Clamp(const double v, const double min, const double max); //! Will return the linear interpolation between `a` and `b` by `t` [[nodiscard]] static constexpr double Lerp(double a, double b, double t); //! Will return the absolute value of `a` [[nodiscard]] static constexpr double Abs(const double a); //! Compares two double values with a given accuracy [[nodiscard]] static constexpr bool Similar(const double a, const double b, const double epsilon = 0.00001); //! Will compute the actual modulo of a fraction. The % operator returns bs for n<0. //! May throw division-by-zero std::logic_error [[nodiscard]] static int Mod(const int numerator, const int denominator); //! Kind of like \f$sin(counter)\f$, but it oscillates over \f$[a,b]\f$ instead of \f$[-1,1]\f$, by a given speed. //! Given that \f$speed = 1\f$, the result will always be `a` if `counter` is even, and `b` if `counter` is uneven. //! If `counter` is a rational, the result will oscillate between `a` and `b`, like `sin()` does. //! If you increase `speed`, the oscillation frequency will increase. Meaning \f$speed = 2\f$ would result in \f$counter=0.5\f$ returning `b`. static double Oscillate(const double a, const double b, const double counter, const double speed); private: // No instanciation! >:( Math(); }; /* These are just the inline methods. They have to lie in the header file. */ /* The more sophisticated methods are in the .cpp */ constexpr inline double Math::Max(double a, double b) { return (a > b) ? a : b; } constexpr inline double Math::Min(double a, double b) { return (a < b) ? a : b; } constexpr inline double Math::Clamp(double v, double min, double max) { return Max(Min(v, max), min); } constexpr inline double Math::Lerp(double a, double b, double t) { const double it = 1.0 - t; return (a * it) + (b * t); } constexpr inline double Math::Abs(const double a) { return (a > 0.0) ? a : -a; } constexpr inline bool Math::Similar(const double a, const double b, const double epsilon) { return Abs(a - b) <= epsilon; } } /*** ./../Eule/Vector2.h ***/ #pragma once #include #include #include //#define _EULE_NO_INTRINSICS_ #ifndef _EULE_NO_INTRINSICS_ #include #endif /* NOTE: Here you will find bad, unoptimized methods for T=int. This is because the compiler needs a method for each type in each instantiation of the template! I can't generalize the methods when heavily optimizing for doubles. These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. */ namespace Eule { template class Vector3; template class Vector4; /** Representation of a 2d vector. * Contains a lot of utility methods. */ template class Vector2 { public: Vector2() : x{ 0 }, y{ 0 } {} Vector2(T _x, T _y) : x{ _x }, y{ _y } {} Vector2(const Vector2& other) = default; Vector2(Vector2&& other) noexcept = default; //! Will compute the dot product to another Vector2 double DotProduct(const Vector2& other) const; //! Will compute the cross product to another Vector2 double CrossProduct(const Vector2& other) const; //! Will compute the square magnitude double SqrMagnitude() const; //! Will compute the magnitude double Magnitude() const; //! Will return the normalization of this vector [[nodiscard]] Vector2 Normalize() const; //! Will normalize this vector void NormalizeSelf(); //! Will scale self.n by scalar.n Vector2 VectorScale(const Vector2& scalar) const; //! Will lerp itself towards other by t void LerpSelf(const Vector2& other, double t); //! Will return a lerp result between this and another vector [[nodiscard]] Vector2 Lerp(const Vector2& other, double t) const; //! Will compare if two vectors are similar to a certain epsilon value [[nodiscard]] bool Similar(const Vector2& other, double epsilon = 0.00001) const; //! Will convert this vector to a Vector2i [[nodiscard]] Vector2 ToInt() const; //! Will convert this vector to a Vector2d [[nodiscard]] Vector2 ToDouble() const; T& operator[](std::size_t idx); const T& operator[](std::size_t idx) const; Vector2 operator+(const Vector2& other) const; void operator+=(const Vector2& other); Vector2 operator-(const Vector2& other) const; void operator-=(const Vector2& other); Vector2 operator*(const T scale) const; void operator*=(const T scale); Vector2 operator/(const T scale) const; void operator/=(const T scale); Vector2 operator-() const; operator Vector3() const; //! Conversion method operator Vector4() const; //! Conversion method void operator=(const Vector2& other); void operator=(Vector2&& other) noexcept; bool operator==(const Vector2& other) const; bool operator!=(const Vector2& other) const; friend std::ostream& operator<< (std::ostream& os, const Vector2& v) { return os << "[x: " << v.x << " y: " << v.y << "]"; } friend std::wostream& operator<< (std::wostream& os, const Vector2& v) { return os << L"[x: " << v.x << L" y: " << v.y << L"]"; } T x; T y; // Some handy predefines static const Vector2 up; static const Vector2 down; static const Vector2 right; static const Vector2 left; static const Vector2 one; static const Vector2 zero; }; typedef Vector2 Vector2i; typedef Vector2 Vector2d; // Good, optimized chad version for doubles template<> double Vector2::DotProduct(const Vector2& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components into registers __m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x); __m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x); // Define bitmask, and execute computation const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); // Retrieve result, and return it float result[8]; _mm256_storeu_ps(result, __dot); return result[0]; #else return (x * other.x) + (y * other.y); #endif } // Slow, lame version for intcels template<> double Vector2::DotProduct(const Vector2& other) const { int iDot = (x * other.x) + (y * other.y); return (double)iDot; } // Good, optimized chad version for doubles template<> double Vector2::CrossProduct(const Vector2& other) const { return (x * other.y) - (y * other.x); } // Slow, lame version for intcels template<> double Vector2::CrossProduct(const Vector2& other) const { int iCross = (x * other.y) - (y * other.x); return (double)iCross; } // Good, optimized chad version for doubles template<> double Vector2::SqrMagnitude() const { // x.DotProduct(x) == x.SqrMagnitude() return DotProduct(*this); } // Slow, lame version for intcels template<> double Vector2::SqrMagnitude() const { int iSqrMag = x*x + y*y; return (double)iSqrMag; } template double Vector2::Magnitude() const { return sqrt(SqrMagnitude()); } template<> Vector2 Vector2::VectorScale(const Vector2& scalar) const { #ifndef _EULE_NO_INTRINSICS_ // Load vectors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x); // Multiply them __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); // Retrieve result double result[4]; _mm256_storeu_pd(result, __product); // Return value return Vector2( result[0], result[1] ); #else return Vector2( x * scalar.x, y * scalar.y ); #endif } template<> Vector2 Vector2::VectorScale(const Vector2& scalar) const { return Vector2( x * scalar.x, y * scalar.y ); } template Vector2 Vector2::Normalize() const { Vector2 norm(x, y); norm.NormalizeSelf(); return norm; } // Method to normalize a Vector2d template<> void Vector2::NormalizeSelf() { double length = Magnitude(); // Prevent division by 0 if (length == 0) { x = 0; y = 0; } else { #ifndef _EULE_NO_INTRINSICS_ // Load vector and length into registers __m256d __vec = _mm256_set_pd(0, 0, y, x); __m256d __len = _mm256_set1_pd(length); // Divide __m256d __prod = _mm256_div_pd(__vec, __len); // Extract and set values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; #else x /= length; y /= length; #endif } return; } // You can't normalize an int vector, ffs! // But we need an implementation for T=int template<> void Vector2::NormalizeSelf() { std::cerr << "Stop normalizing int-vectors!!" << std::endl; x = 0; y = 0; return; } // Good, optimized chad version for doubles template<> void Vector2::LerpSelf(const Vector2& other, double t) { const double it = 1.0 - t; // Inverse t #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); __m256d __t = _mm256_set1_pd(t); __m256d __it = _mm256_set1_pd(it); // Inverse t // Procedure: // (__vector_self * __it) + (__vector_other * __t) __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); // Retrieve result, and apply it double sum[4]; _mm256_storeu_pd(sum, __sum); x = sum[0]; y = sum[1]; #else x = it * x + t * other.x; y = it * y + t * other.y; #endif return; } // Slow, lame version for intcels template<> void Vector2::LerpSelf(const Vector2& other, double t) { const double it = 1.0 - t; // Inverse t x = (int)(it * (double)x + t * (double)other.x); y = (int)(it * (double)y + t * (double)other.y); return; } template<> Vector2 Vector2::Lerp(const Vector2& other, double t) const { Vector2d copy(*this); copy.LerpSelf(other, t); return copy; } template<> Vector2 Vector2::Lerp(const Vector2& other, double t) const { Vector2d copy(this->ToDouble()); copy.LerpSelf(other.ToDouble(), t); return copy; } template T& Vector2::operator[](std::size_t idx) { switch (idx) { case 0: return x; case 1: return y; default: throw std::out_of_range("Array descriptor on Vector2 out of range!"); } } template const T& Vector2::operator[](std::size_t idx) const { switch (idx) { case 0: return x; case 1: return y; default: throw std::out_of_range("Array descriptor on Vector2 out of range!"); } } template bool Vector2::Similar(const Vector2& other, double epsilon) const { return (::Eule::Math::Similar(x, other.x, epsilon)) && (::Eule::Math::Similar(y, other.y, epsilon)) ; } template Vector2 Vector2::ToInt() const { return Vector2((int)x, (int)y); } template Vector2 Vector2::ToDouble() const { return Vector2((double)x, (double)y); } template<> Vector2 Vector2::operator+(const Vector2& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); // Add the components __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); // Retrieve and return these values double sum[4]; _mm256_storeu_pd(sum, __sum); return Vector2( sum[0], sum[1] ); #else return Vector2( x + other.x, y + other.y ); #endif } template Vector2 Vector2::operator+(const Vector2& other) const { return Vector2( x + other.x, y + other.y ); } template<> void Vector2::operator+=(const Vector2& other) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); // Add the components __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); // Retrieve and apply these values double sum[4]; _mm256_storeu_pd(sum, __sum); x = sum[0]; y = sum[1]; #else x += other.x; y += other.y; #endif return; } template void Vector2::operator+=(const Vector2& other) { x += other.x; y += other.y; return; } template<> Vector2 Vector2::operator-(const Vector2& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); // Subtract the components __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); // Retrieve and return these values double diff[4]; _mm256_storeu_pd(diff, __diff); return Vector2( diff[0], diff[1] ); #else return Vector2( x - other.x, y - other.y ); #endif } template Vector2 Vector2::operator-(const Vector2& other) const { return Vector2( x - other.x, y - other.y ); } template<> void Vector2::operator-=(const Vector2& other) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x); // Subtract the components __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); // Retrieve and apply these values double diff[4]; _mm256_storeu_pd(diff, __diff); x = diff[0]; y = diff[1]; #else x -= other.x; y -= other.y; #endif return; } template void Vector2::operator-=(const Vector2& other) { x -= other.x; y -= other.y; return; } template<> Vector2 Vector2::operator*(const double scale) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Multiply the components __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); // Retrieve and return these values double prod[4]; _mm256_storeu_pd(prod, __prod); return Vector2( prod[0], prod[1] ); #else return Vector2( x * scale, y * scale ); #endif } template Vector2 Vector2::operator*(const T scale) const { return Vector2( x * scale, y * scale ); } template<> void Vector2::operator*=(const double scale) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Multiply the components __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); // Retrieve and apply these values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; #else x *= scale; y *= scale; #endif return; } template void Vector2::operator*=(const T scale) { x *= scale; y *= scale; return; } template<> Vector2 Vector2::operator/(const double scale) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Divide the components __m256d __prod = _mm256_div_pd(__vector_self, __scalar); // Retrieve and return these values double prod[4]; _mm256_storeu_pd(prod, __prod); return Vector2( prod[0], prod[1] ); #else return Vector2( x / scale, y / scale ); #endif } template Vector2 Vector2::operator/(const T scale) const { return Vector2( x / scale, y / scale ); } template<> void Vector2::operator/=(const double scale) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, 0, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Divide the components __m256d __prod = _mm256_div_pd(__vector_self, __scalar); // Retrieve and apply these values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; #else x /= scale; y /= scale; #endif return; } template void Vector2::operator/=(const T scale) { x /= scale; y /= scale; return; } template void Vector2::operator=(const Vector2& other) { x = other.x; y = other.y; return; } template void Vector2::operator=(Vector2&& other) noexcept { x = std::move(other.x); y = std::move(other.y); return; } template bool Vector2::operator==(const Vector2& other) const { return (x == other.x) && (y == other.y); } template bool Vector2::operator!=(const Vector2& other) const { return !operator==(other); } template Vector2 Vector2::operator-() const { return Vector2( -x, -y ); } template class Vector2; template class Vector2; // Some handy predefines template const Vector2 Vector2::up(0, 1); template const Vector2 Vector2::down(0, -1); template const Vector2 Vector2::right(1, 0); template const Vector2 Vector2::left(-1, 0); template const Vector2 Vector2::one(1, 1); template const Vector2 Vector2::zero(0, 0); } /*** ./../Eule/Vector3.h ***/ #pragma once #include #include #include #include #include //#define _EULE_NO_INTRINSICS_ #ifndef _EULE_NO_INTRINSICS_ #include #endif /* NOTE: Here you will find bad, unoptimized methods for T=int. This is because the compiler needs a method for each type in each instantiation of the template! I can't generalize the methods when heavily optimizing for doubles. These functions will get called VERY rarely, if ever at all, for T=int, so it's ok. The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus. */ namespace Eule { template class Vector2; template class Vector4; /** Representation of a 3d vector. * Contains a lot of utility methods. */ template class Vector3 { public: Vector3() : x{ 0 }, y{ 0 }, z{ 0 } {} Vector3(T _x, T _y, T _z) : x{ _x }, y{ _y }, z{ _z } {} Vector3(const Vector3& other) = default; Vector3(Vector3&& other) noexcept = default; //! Will compute the dot product to another Vector3 double DotProduct(const Vector3& other) const; //! Will compute the cross product to another Vector3 Vector3 CrossProduct(const Vector3& other) const; //! Will compute the square magnitude double SqrMagnitude() const; //! Will compute the magnitude double Magnitude() const; //! Will return the normalization of this vector [[nodiscard]] Vector3 Normalize() const; //! Will normalize this vector void NormalizeSelf(); //! Will scale self.n by scalar.n [[nodiscard]] Vector3 VectorScale(const Vector3& scalar) const; //! Will lerp itself towards other by t void LerpSelf(const Vector3& other, double t); //! Will return a lerp result between this and another vector [[nodiscard]] Vector3 Lerp(const Vector3& other, double t) const; //! Will compare if two vectors are similar to a certain epsilon value [[nodiscard]] bool Similar(const Vector3& other, double epsilon = 0.00001) const; //! Will convert this vector to a Vector3i [[nodiscard]] Vector3 ToInt() const; //! Will convert this vector to a Vector3d [[nodiscard]] Vector3 ToDouble() const; T& operator[](std::size_t idx); const T& operator[](std::size_t idx) const; Vector3 operator+(const Vector3& other) const; void operator+=(const Vector3& other); Vector3 operator-(const Vector3& other) const; void operator-=(const Vector3& other); Vector3 operator*(const T scale) const; void operator*=(const T scale); Vector3 operator/(const T scale) const; void operator/=(const T scale); Vector3 operator*(const Matrix4x4& mat) const; void operator*=(const Matrix4x4& mat); Vector3 operator-() const; operator Vector2() const; //! Conversion method operator Vector4() const; //! Conversion method void operator=(const Vector3& other); void operator=(Vector3&& other) noexcept; bool operator==(const Vector3& other) const; bool operator!=(const Vector3& other) const; friend std::ostream& operator << (std::ostream& os, const Vector3& v) { return os << "[x: " << v.x << " y: " << v.y << " z: " << v.z << "]"; } friend std::wostream& operator << (std::wostream& os, const Vector3& v) { return os << L"[x: " << v.x << L" y: " << v.y << L" z: " << v.z << L"]"; } T x; T y; T z; // Some handy predefines static const Vector3 up; static const Vector3 down; static const Vector3 right; static const Vector3 left; static const Vector3 forward; static const Vector3 backward; static const Vector3 one; static const Vector3 zero; }; typedef Vector3 Vector3i; typedef Vector3 Vector3d; // Good, optimized chad version for doubles template<> double Vector3::DotProduct(const Vector3& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components into registers __m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x); __m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x); // Define bitmask, and execute computation const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot __m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask); // Retrieve result, and return it float result[8]; _mm256_storeu_ps(result, __dot); return result[0]; #else return (x * other.x) + (y * other.y) + (z * other.z); #endif } // Slow, lame version for intcels template<> double Vector3::DotProduct(const Vector3& other) const { int iDot = (x * other.x) + (y * other.y) + (z * other.z); return (double)iDot; } // Good, optimized chad version for doubles template<> Vector3 Vector3::CrossProduct(const Vector3& other) const { Vector3 cp; cp.x = (y * other.z) - (z * other.y); cp.y = (z * other.x) - (x * other.z); cp.z = (x * other.y) - (y * other.x); return cp; } // Slow, lame version for intcels template<> Vector3 Vector3::CrossProduct(const Vector3& other) const { Vector3 cp; cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y); cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z); cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x); return cp; } // Good, optimized chad version for doubles template<> double Vector3::SqrMagnitude() const { // x.DotProduct(x) == x.SqrMagnitude() return DotProduct(*this); } // Slow, lame version for intcels template<> double Vector3::SqrMagnitude() const { int iSqrMag = x*x + y*y + z*z; return (double)iSqrMag; } template double Vector3::Magnitude() const { return sqrt(SqrMagnitude()); } template<> Vector3 Vector3::VectorScale(const Vector3& scalar) const { #ifndef _EULE_NO_INTRINSICS_ // Load vectors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x); // Multiply them __m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar); // Retrieve result double result[4]; _mm256_storeu_pd(result, __product); // Return value return Vector3( result[0], result[1], result[2] ); #else return Vector3( x * scalar.x, y * scalar.y, z * scalar.z ); #endif } template<> Vector3 Vector3::VectorScale(const Vector3& scalar) const { return Vector3( x * scalar.x, y * scalar.y, z * scalar.z ); } template Vector3 Vector3::Normalize() const { Vector3 norm(x, y, z); norm.NormalizeSelf(); return norm; } // Method to normalize a Vector3d template<> void Vector3::NormalizeSelf() { const double length = Magnitude(); // Prevent division by 0 if (length == 0) { x = 0; y = 0; z = 0; } else { #ifndef _EULE_NO_INTRINSICS_ // Load vector and length into registers __m256d __vec = _mm256_set_pd(0, z, y, x); __m256d __len = _mm256_set1_pd(length); // Divide __m256d __prod = _mm256_div_pd(__vec, __len); // Extract and set values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; z = prod[2]; #else x /= length; y /= length; z /= length; #endif } return; } // You can't normalize an int vector, ffs! // But we need an implementation for T=int template<> void Vector3::NormalizeSelf() { std::cerr << "Stop normalizing int-vectors!!" << std::endl; x = 0; y = 0; z = 0; return; } template bool Vector3::Similar(const Vector3& other, double epsilon) const { return (::Eule::Math::Similar(x, other.x, epsilon)) && (::Eule::Math::Similar(y, other.y, epsilon)) && (::Eule::Math::Similar(z, other.z, epsilon)) ; } template Vector3 Vector3::ToInt() const { return Vector3((int)x, (int)y, (int)z); } template Vector3 Vector3::ToDouble() const { return Vector3((double)x, (double)y, (double)z); } template T& Vector3::operator[](std::size_t idx) { switch (idx) { case 0: return x; case 1: return y; case 2: return z; default: throw std::out_of_range("Array descriptor on Vector3 out of range!"); } } template const T& Vector3::operator[](std::size_t idx) const { switch (idx) { case 0: return x; case 1: return y; case 2: return z; default: throw std::out_of_range("Array descriptor on Vector3 out of range!"); } } // Good, optimized chad version for doubles template<> void Vector3::LerpSelf(const Vector3& other, double t) { const double it = 1.0 - t; // Inverse t #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); __m256d __t = _mm256_set1_pd(t); __m256d __it = _mm256_set1_pd(it); // Inverse t // Procedure: // (__vector_self * __it) + (__vector_other * __t) __m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications __sum = _mm256_fmadd_pd(__vector_self, __it, __sum); __sum = _mm256_fmadd_pd(__vector_other, __t, __sum); // Retrieve result, and apply it double sum[4]; _mm256_storeu_pd(sum, __sum); x = sum[0]; y = sum[1]; z = sum[2]; #else x = it*x + t*other.x; y = it*y + t*other.y; z = it*z + t*other.z; #endif return; } // Slow, lame version for intcels template<> void Vector3::LerpSelf(const Vector3& other, double t) { const double it = 1.0 - t; // Inverse t x = (int)(it * (double)x + t * (double)other.x); y = (int)(it * (double)y + t * (double)other.y); z = (int)(it * (double)z + t * (double)other.z); return; } template<> Vector3 Vector3::Lerp(const Vector3& other, double t) const { Vector3d copy(*this); copy.LerpSelf(other, t); return copy; } template<> Vector3 Vector3::Lerp(const Vector3& other, double t) const { Vector3d copy(this->ToDouble()); copy.LerpSelf(other.ToDouble(), t); return copy; } template<> Vector3 Vector3::operator+(const Vector3& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); // Add the components __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); // Retrieve and return these values double sum[4]; _mm256_storeu_pd(sum, __sum); return Vector3( sum[0], sum[1], sum[2] ); #else return Vector3( x + other.x, y + other.y, z + other.z ); #endif } template Vector3 Vector3::operator+(const Vector3& other) const { return Vector3( x + other.x, y + other.y, z + other.z ); } template<> void Vector3::operator+=(const Vector3& other) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); // Add the components __m256d __sum = _mm256_add_pd(__vector_self, __vector_other); // Retrieve and apply these values double sum[4]; _mm256_storeu_pd(sum, __sum); x = sum[0]; y = sum[1]; z = sum[2]; #else x += other.x; y += other.y; z += other.z; #endif return; } template void Vector3::operator+=(const Vector3& other) { x += other.x; y += other.y; z += other.z; return; } template<> Vector3 Vector3::operator-(const Vector3& other) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); // Subtract the components __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); // Retrieve and return these values double diff[4]; _mm256_storeu_pd(diff, __diff); return Vector3( diff[0], diff[1], diff[2] ); #else return Vector3( x - other.x, y - other.y, z - other.z ); #endif } template Vector3 Vector3::operator-(const Vector3& other) const { return Vector3( x - other.x, y - other.y, z - other.z ); } template<> void Vector3::operator-=(const Vector3& other) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x); // Subtract the components __m256d __diff = _mm256_sub_pd(__vector_self, __vector_other); // Retrieve and apply these values double diff[4]; _mm256_storeu_pd(diff, __diff); x = diff[0]; y = diff[1]; z = diff[2]; #else x -= other.x; y -= other.y; z -= other.z; #endif return; } template void Vector3::operator-=(const Vector3& other) { x -= other.x; y -= other.y; z -= other.z; return; } template<> Vector3 Vector3::operator*(const double scale) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Multiply the components __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); // Retrieve and return these values double prod[4]; _mm256_storeu_pd(prod, __prod); return Vector3( prod[0], prod[1], prod[2] ); #else return Vector3( x * scale, y * scale, z * scale ); #endif } template Vector3 Vector3::operator*(const T scale) const { return Vector3( x * scale, y * scale, z * scale ); } template<> void Vector3::operator*=(const double scale) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Multiply the components __m256d __prod = _mm256_mul_pd(__vector_self, __scalar); // Retrieve and apply these values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; z = prod[2]; #else x *= scale; y *= scale; z *= scale; #endif return; } template void Vector3::operator*=(const T scale) { x *= scale; y *= scale; z *= scale; return; } template<> Vector3 Vector3::operator/(const double scale) const { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Divide the components __m256d __prod = _mm256_div_pd(__vector_self, __scalar); // Retrieve and return these values double prod[4]; _mm256_storeu_pd(prod, __prod); return Vector3( prod[0], prod[1], prod[2] ); #else return Vector3( x / scale, y / scale, z / scale ); #endif } template Vector3 Vector3::operator/(const T scale) const { return Vector3( x / scale, y / scale, z / scale ); } template<> void Vector3::operator/=(const double scale) { #ifndef _EULE_NO_INTRINSICS_ // Move vector components and factors into registers __m256d __vector_self = _mm256_set_pd(0, z, y, x); __m256d __scalar = _mm256_set1_pd(scale); // Divide the components __m256d __prod = _mm256_div_pd(__vector_self, __scalar); // Retrieve and apply these values double prod[4]; _mm256_storeu_pd(prod, __prod); x = prod[0]; y = prod[1]; z = prod[2]; #else x /= scale; y /= scale; z /= scale; #endif return; } template void Vector3::operator/=(const T scale) { x /= scale; y /= scale; z /= scale; return; } // Good, optimized chad version for doubles template<> Vector3 Vector3::operator*(const Matrix4x4& mat) const { Vector3 newVec; #ifndef _EULE_NO_INTRINSICS_ // Store x, y, and z values __m256d __vecx = _mm256_set1_pd(x); __m256d __vecy = _mm256_set1_pd(y); __m256d __vecz = _mm256_set1_pd(z); // Store matrix values __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); // Multiply x, y, z and matrix values __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); // Sum up the products __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); // Store translation values __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); // Add the translation values __m256d __final = _mm256_add_pd(__sum, __translation); double dfinal[4]; _mm256_storeu_pd(dfinal, __final); newVec.x = dfinal[3]; newVec.y = dfinal[2]; newVec.z = dfinal[1]; #else // Rotation, Scaling newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); // Translation newVec.x += mat[0][3]; newVec.y += mat[1][3]; newVec.z += mat[2][3]; #endif return newVec; } // Slow, lame version for intcels template<> Vector3 Vector3::operator*(const Matrix4x4& mat) const { Vector3 newVec; // Rotation, Scaling newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z); newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z); newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z); // Translation newVec.x += mat[0][3]; newVec.y += mat[1][3]; newVec.z += mat[2][3]; return Vector3( (int)newVec.x, (int)newVec.y, (int)newVec.z ); } // Good, optimized chad version for doubles template<> void Vector3::operator*=(const Matrix4x4& mat) { #ifndef _EULE_NO_INTRINSICS_ // Store x, y, and z values __m256d __vecx = _mm256_set1_pd(x); __m256d __vecy = _mm256_set1_pd(y); __m256d __vecz = _mm256_set1_pd(z); // Store matrix values __m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0); __m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0); __m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0); // Multiply x, y, z and matrix values __m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0); __m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1); __m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2); // Sum up the products __m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2)); // Store translation values __m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0); // Add the translation values __m256d __final = _mm256_add_pd(__sum, __translation); double dfinal[4]; _mm256_storeu_pd(dfinal, __final); x = dfinal[3]; y = dfinal[2]; z = dfinal[1]; #else Vector3 buffer = *this; x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z); y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z); z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z); // Translation x += mat[0][3]; y += mat[1][3]; z += mat[2][3]; #endif return; } template Vector3 Vector3::operator-() const { return Vector3( -x, -y, -z ); } template void Vector3::operator=(const Vector3& other) { x = other.x; y = other.y; z = other.z; return; } template void Vector3::operator=(Vector3&& other) noexcept { x = std::move(other.x); y = std::move(other.y); z = std::move(other.z); return; } // Slow, lame version for intcels template<> void Vector3::operator*=(const Matrix4x4& mat) { Vector3 buffer(x, y, z); x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z)); y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z)); z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z)); // Translation x += (int)mat[0][3]; y += (int)mat[1][3]; z += (int)mat[2][3]; return; } template bool Vector3::operator==(const Vector3& other) const { return (x == other.x) && (y == other.y) && (z == other.z); } template bool Vector3::operator!=(const Vector3& other) const { return !operator==(other); } template class Vector3; template class Vector3; // Some handy predefines template const Vector3 Vector3::up(0, 1, 0); template const Vector3 Vector3::down(0, -1, 0); template const Vector3 Vector3::right(1, 0, 0); template const Vector3 Vector3::left(-1, 0, 0); template const Vector3 Vector3