Made Eule compile Vector2 and Vector4 classes

This commit is contained in:
Leonetienne 2022-02-11 14:16:02 +01:00
parent 0851bc4fd0
commit 0538fce475
4 changed files with 1624 additions and 1601 deletions

View File

@ -2,14 +2,719 @@
#include "Vector3.h"
#include "Vector4.h"
template<typename T>
Eule::Vector2<T>::operator Eule::Vector3<T>() const
{
return Vector3<T>(x, y, 0);
}
#include "Math.h"
#include <iostream>
template<typename T>
Eule::Vector2<T>::operator Eule::Vector4<T>() const
{
return Vector4<T>(x, y, 0, 0);
//#define _EULE_NO_INTRINSICS_
#ifndef _EULE_NO_INTRINSICS_
#include <immintrin.h>
#endif
/*
NOTE:
Here you will find bad, unoptimized methods for T=int.
This is because the compiler needs a method for each type in each instantiation of the template!
I can't generalize the methods when heavily optimizing for doubles.
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
*/
namespace Eule {
template<typename T>
Vector2<T>::operator Vector3<T>() const
{
return Vector3<T>(x, y, 0);
}
template<typename T>
Vector2<T>::operator Vector4<T>() const
{
return Vector4<T>(x, y, 0, 0);
}
// Good, optimized chad version for doubles
template<>
double Vector2<double>::DotProduct(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components into registers
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
// Define bitmask, and execute computation
const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
// Retrieve result, and return it
float result[8];
_mm256_storeu_ps(result, __dot);
return result[0];
#else
return (x * other.x) +
(y * other.y);
#endif
}
// Slow, lame version for intcels
template<>
double Vector2<int>::DotProduct(const Vector2<int>& other) const
{
int iDot = (x * other.x) +
(y * other.y);
return (double)iDot;
}
// Good, optimized chad version for doubles
template<>
double Vector2<double>::CrossProduct(const Vector2<double>& other) const
{
return (x * other.y) -
(y * other.x);
}
// Slow, lame version for intcels
template<>
double Vector2<int>::CrossProduct(const Vector2<int>& other) const
{
int iCross = (x * other.y) -
(y * other.x);
return (double)iCross;
}
// Good, optimized chad version for doubles
template<>
double Vector2<double>::SqrMagnitude() const
{
// x.DotProduct(x) == x.SqrMagnitude()
return DotProduct(*this);
}
// Slow, lame version for intcels
template<>
double Vector2<int>::SqrMagnitude() const
{
int iSqrMag = x*x + y*y;
return (double)iSqrMag;
}
template<typename T>
double Vector2<T>::Magnitude() const
{
return sqrt(SqrMagnitude());
}
template<>
Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
{
#ifndef _EULE_NO_INTRINSICS_
// Load vectors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
// Multiply them
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
// Retrieve result
double result[4];
_mm256_storeu_pd(result, __product);
// Return value
return Vector2<double>(
result[0],
result[1]
);
#else
return Vector2<double>(
x * scalar.x,
y * scalar.y
);
#endif
}
template<>
Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
{
return Vector2<int>(
x * scalar.x,
y * scalar.y
);
}
template<typename T>
Vector2<double> Vector2<T>::Normalize() const
{
Vector2<double> norm(x, y);
norm.NormalizeSelf();
return norm;
}
// Method to normalize a Vector2d
template<>
void Vector2<double>::NormalizeSelf()
{
double length = Magnitude();
// Prevent division by 0
if (length == 0)
{
x = 0;
y = 0;
}
else
{
#ifndef _EULE_NO_INTRINSICS_
// Load vector and length into registers
__m256d __vec = _mm256_set_pd(0, 0, y, x);
__m256d __len = _mm256_set1_pd(length);
// Divide
__m256d __prod = _mm256_div_pd(__vec, __len);
// Extract and set values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x /= length;
y /= length;
#endif
}
return;
}
// You can't normalize an int vector, ffs!
// But we need an implementation for T=int
template<>
void Vector2<int>::NormalizeSelf()
{
x = 0;
y = 0;
return;
}
// Good, optimized chad version for doubles
template<>
void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
{
const double it = 1.0 - t; // Inverse t
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
__m256d __t = _mm256_set1_pd(t);
__m256d __it = _mm256_set1_pd(it); // Inverse t
// Procedure:
// (__vector_self * __it) + (__vector_other * __t)
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
// Retrieve result, and apply it
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
#else
x = it * x + t * other.x;
y = it * y + t * other.y;
#endif
return;
}
// Slow, lame version for intcels
template<>
void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
{
const double it = 1.0 - t; // Inverse t
x = (int)(it * (double)x + t * (double)other.x);
y = (int)(it * (double)y + t * (double)other.y);
return;
}
template<>
Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
{
Vector2d copy(*this);
copy.LerpSelf(other, t);
return copy;
}
template<>
Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
{
Vector2d copy(this->ToDouble());
copy.LerpSelf(other.ToDouble(), t);
return copy;
}
template<typename T>
T& Vector2<T>::operator[](std::size_t idx)
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
default:
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
}
}
template<typename T>
const T& Vector2<T>::operator[](std::size_t idx) const
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
default:
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
}
}
template<typename T>
bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
{
return
(::Eule::Math::Similar(x, other.x, epsilon)) &&
(::Eule::Math::Similar(y, other.y, epsilon))
;
}
template<typename T>
Vector2<int> Vector2<T>::ToInt() const
{
return Vector2<int>((int)x, (int)y);
}
template<typename T>
Vector2<double> Vector2<T>::ToDouble() const
{
return Vector2<double>((double)x, (double)y);
}
template<>
Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and return these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
return Vector2<double>(
sum[0],
sum[1]
);
#else
return Vector2<double>(
x + other.x,
y + other.y
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
{
return Vector2<T>(
x + other.x,
y + other.y
);
}
template<>
void Vector2<double>::operator+=(const Vector2<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
#else
x += other.x;
y += other.y;
#endif
return;
}
template<typename T>
void Vector2<T>::operator+=(const Vector2<T>& other)
{
x += other.x;
y += other.y;
return;
}
template<>
Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and return these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
return Vector2<double>(
diff[0],
diff[1]
);
#else
return Vector2<double>(
x - other.x,
y - other.y
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
{
return Vector2<T>(
x - other.x,
y - other.y
);
}
template<>
void Vector2<double>::operator-=(const Vector2<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
x = diff[0];
y = diff[1];
#else
x -= other.x;
y -= other.y;
#endif
return;
}
template<typename T>
void Vector2<T>::operator-=(const Vector2<T>& other)
{
x -= other.x;
y -= other.y;
return;
}
template<>
Vector2<double> Vector2<double>::operator*(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector2<double>(
prod[0],
prod[1]
);
#else
return Vector2<double>(
x * scale,
y * scale
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator*(const T scale) const
{
return Vector2<T>(
x * scale,
y * scale
);
}
template<>
void Vector2<double>::operator*=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x *= scale;
y *= scale;
#endif
return;
}
template<typename T>
void Vector2<T>::operator*=(const T scale)
{
x *= scale;
y *= scale;
return;
}
template<>
Vector2<double> Vector2<double>::operator/(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector2<double>(
prod[0],
prod[1]
);
#else
return Vector2<double>(
x / scale,
y / scale
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator/(const T scale) const
{
return Vector2<T>(
x / scale,
y / scale
);
}
template<>
void Vector2<double>::operator/=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x /= scale;
y /= scale;
#endif
return;
}
template<typename T>
void Vector2<T>::operator/=(const T scale)
{
x /= scale;
y /= scale;
return;
}
template<typename T>
void Vector2<T>::operator=(const Vector2<T>& other)
{
x = other.x;
y = other.y;
return;
}
template<typename T>
void Vector2<T>::operator=(Vector2<T>&& other) noexcept
{
x = std::move(other.x);
y = std::move(other.y);
return;
}
template<typename T>
bool Vector2<T>::operator==(const Vector2<T>& other) const
{
return
(x == other.x) &&
(y == other.y);
}
template<typename T>
bool Vector2<T>::operator!=(const Vector2<T>& other) const
{
return !operator==(other);
}
template<typename T>
Vector2<T> Vector2<T>::operator-() const
{
return Vector2<T>(
-x,
-y
);
}
template class Vector2<int>;
template class Vector2<double>;
// Some handy predefines
template <typename T>
const Vector2<double> Vector2<T>::up(0, 1);
template <typename T>
const Vector2<double> Vector2<T>::down(0, -1);
template <typename T>
const Vector2<double> Vector2<T>::right(1, 0);
template <typename T>
const Vector2<double> Vector2<T>::left(-1, 0);
template <typename T>
const Vector2<double> Vector2<T>::one(1, 1);
template <typename T>
const Vector2<double> Vector2<T>::zero(0, 0);
}

View File

@ -1,804 +1,118 @@
#pragma once
#include <cstdlib>
#include <sstream>
#include "Math.h"
#include <iostream>
//#define _EULE_NO_INTRINSICS_
#ifndef _EULE_NO_INTRINSICS_
#include <immintrin.h>
#endif
namespace Eule {
template<typename T>
class Vector3;
/*
NOTE:
Here you will find bad, unoptimized methods for T=int.
This is because the compiler needs a method for each type in each instantiation of the template!
I can't generalize the methods when heavily optimizing for doubles.
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
*/
template<typename T>
class Vector4;
namespace Eule
{
template <typename T> class Vector3;
template <typename T> class Vector4;
/** Representation of a 2d vector.
* Contains a lot of utility methods.
*/
template<typename T>
class Vector2 {
public:
Vector2() : x{0}, y{0} {}
/** Representation of a 2d vector.
* Contains a lot of utility methods.
*/
template <typename T>
class Vector2
{
public:
Vector2() : x{ 0 }, y{ 0 } {}
Vector2(T _x, T _y) : x{ _x }, y{ _y } {}
Vector2(const Vector2<T>& other) = default;
Vector2(Vector2<T>&& other) noexcept = default;
Vector2(T _x, T _y) : x{_x}, y{_y} {}
//! Will compute the dot product to another Vector2
double DotProduct(const Vector2<T>& other) const;
Vector2(const Vector2<T> &other) = default;
//! Will compute the cross product to another Vector2
double CrossProduct(const Vector2<T>& other) const;
Vector2(Vector2<T> &&other) noexcept = default;
//! Will compute the square magnitude
double SqrMagnitude() const;
//! Will compute the dot product to another Vector2
double DotProduct(const Vector2<T> &other) const;
//! Will compute the magnitude
double Magnitude() const;
//! Will compute the cross product to another Vector2
double CrossProduct(const Vector2<T> &other) const;
//! Will return the normalization of this vector
[[nodiscard]] Vector2<double> Normalize() const;
//! Will compute the square magnitude
double SqrMagnitude() const;
//! Will normalize this vector
void NormalizeSelf();
//! Will compute the magnitude
double Magnitude() const;
//! Will scale self.n by scalar.n
Vector2<T> VectorScale(const Vector2<T>& scalar) const;
//! Will return the normalization of this vector
[[nodiscard]] Vector2<double> Normalize() const;
//! Will lerp itself towards other by t
void LerpSelf(const Vector2<T>& other, double t);
//! Will normalize this vector
void NormalizeSelf();
//! Will return a lerp result between this and another vector
[[nodiscard]] Vector2<double> Lerp(const Vector2<T>& other, double t) const;
//! Will scale self.n by scalar.n
Vector2<T> VectorScale(const Vector2<T> &scalar) const;
//! Will compare if two vectors are similar to a certain epsilon value
[[nodiscard]] bool Similar(const Vector2<T>& other, double epsilon = 0.00001) const;
//! Will lerp itself towards other by t
void LerpSelf(const Vector2<T> &other, double t);
//! Will convert this vector to a Vector2i
[[nodiscard]] Vector2<int> ToInt() const;
//! Will return a lerp result between this and another vector
[[nodiscard]] Vector2<double> Lerp(const Vector2<T> &other, double t) const;
//! Will convert this vector to a Vector2d
[[nodiscard]] Vector2<double> ToDouble() const;
//! Will compare if two vectors are similar to a certain epsilon value
[[nodiscard]] bool Similar(const Vector2<T> &other, double epsilon = 0.00001) const;
T& operator[](std::size_t idx);
const T& operator[](std::size_t idx) const;
//! Will convert this vector to a Vector2i
[[nodiscard]] Vector2<int> ToInt() const;
Vector2<T> operator+(const Vector2<T>& other) const;
void operator+=(const Vector2<T>& other);
Vector2<T> operator-(const Vector2<T>& other) const;
void operator-=(const Vector2<T>& other);
Vector2<T> operator*(const T scale) const;
void operator*=(const T scale);
Vector2<T> operator/(const T scale) const;
void operator/=(const T scale);
Vector2<T> operator-() const;
//! Will convert this vector to a Vector2d
[[nodiscard]] Vector2<double> ToDouble() const;
operator Vector3<T>() const; //! Conversion method
operator Vector4<T>() const; //! Conversion method
T &operator[](std::size_t idx);
void operator=(const Vector2<T>& other);
void operator=(Vector2<T>&& other) noexcept;
const T &operator[](std::size_t idx) const;
bool operator==(const Vector2<T>& other) const;
bool operator!=(const Vector2<T>& other) const;
Vector2<T> operator+(const Vector2<T> &other) const;
friend std::ostream& operator<< (std::ostream& os, const Vector2<T>& v)
{
return os << "[x: " << v.x << " y: " << v.y << "]";
}
friend std::wostream& operator<< (std::wostream& os, const Vector2<T>& v)
{
return os << L"[x: " << v.x << L" y: " << v.y << L"]";
}
void operator+=(const Vector2<T> &other);
T x;
T y;
Vector2<T> operator-(const Vector2<T> &other) const;
// Some handy predefines
static const Vector2<double> up;
static const Vector2<double> down;
static const Vector2<double> right;
static const Vector2<double> left;
static const Vector2<double> one;
static const Vector2<double> zero;
};
void operator-=(const Vector2<T> &other);
typedef Vector2<int> Vector2i;
typedef Vector2<double> Vector2d;
Vector2<T> operator*(const T scale) const;
// Good, optimized chad version for doubles
template<>
double Vector2<double>::DotProduct(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
void operator*=(const T scale);
// Move vector components into registers
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
Vector2<T> operator/(const T scale) const;
// Define bitmask, and execute computation
const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
void operator/=(const T scale);
// Retrieve result, and return it
float result[8];
_mm256_storeu_ps(result, __dot);
Vector2<T> operator-() const;
return result[0];
operator Vector3<T>() const; //! Conversion method
operator Vector4<T>() const; //! Conversion method
void operator=(const Vector2<T> &other);
void operator=(Vector2<T> &&other) noexcept;
bool operator==(const Vector2<T> &other) const;
bool operator!=(const Vector2<T> &other) const;
friend std::ostream &operator<<(std::ostream &os, const Vector2<T> &v) {
return os << "[x: " << v.x << " y: " << v.y << "]";
}
friend std::wostream &operator<<(std::wostream &os, const Vector2<T> &v) {
return os << L"[x: " << v.x << L" y: " << v.y << L"]";
}
T x;
T y;
// Some handy predefines
static const Vector2<double> up;
static const Vector2<double> down;
static const Vector2<double> right;
static const Vector2<double> left;
static const Vector2<double> one;
static const Vector2<double> zero;
};
typedef Vector2<int> Vector2i;
typedef Vector2<double> Vector2d;
#else
return (x * other.x) +
(y * other.y);
#endif
}
// Slow, lame version for intcels
template<>
double Vector2<int>::DotProduct(const Vector2<int>& other) const
{
int iDot = (x * other.x) +
(y * other.y);
return (double)iDot;
}
// Good, optimized chad version for doubles
template<>
double Vector2<double>::CrossProduct(const Vector2<double>& other) const
{
return (x * other.y) -
(y * other.x);
}
// Slow, lame version for intcels
template<>
double Vector2<int>::CrossProduct(const Vector2<int>& other) const
{
int iCross = (x * other.y) -
(y * other.x);
return (double)iCross;
}
// Good, optimized chad version for doubles
template<>
double Vector2<double>::SqrMagnitude() const
{
// x.DotProduct(x) == x.SqrMagnitude()
return DotProduct(*this);
}
// Slow, lame version for intcels
template<>
double Vector2<int>::SqrMagnitude() const
{
int iSqrMag = x*x + y*y;
return (double)iSqrMag;
}
template<typename T>
double Vector2<T>::Magnitude() const
{
return sqrt(SqrMagnitude());
}
template<>
Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
{
#ifndef _EULE_NO_INTRINSICS_
// Load vectors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
// Multiply them
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
// Retrieve result
double result[4];
_mm256_storeu_pd(result, __product);
// Return value
return Vector2<double>(
result[0],
result[1]
);
#else
return Vector2<double>(
x * scalar.x,
y * scalar.y
);
#endif
}
template<>
Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
{
return Vector2<int>(
x * scalar.x,
y * scalar.y
);
}
template<typename T>
Vector2<double> Vector2<T>::Normalize() const
{
Vector2<double> norm(x, y);
norm.NormalizeSelf();
return norm;
}
// Method to normalize a Vector2d
template<>
void Vector2<double>::NormalizeSelf()
{
double length = Magnitude();
// Prevent division by 0
if (length == 0)
{
x = 0;
y = 0;
}
else
{
#ifndef _EULE_NO_INTRINSICS_
// Load vector and length into registers
__m256d __vec = _mm256_set_pd(0, 0, y, x);
__m256d __len = _mm256_set1_pd(length);
// Divide
__m256d __prod = _mm256_div_pd(__vec, __len);
// Extract and set values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x /= length;
y /= length;
#endif
}
return;
}
// You can't normalize an int vector, ffs!
// But we need an implementation for T=int
template<>
void Vector2<int>::NormalizeSelf()
{
x = 0;
y = 0;
return;
}
// Good, optimized chad version for doubles
template<>
void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
{
const double it = 1.0 - t; // Inverse t
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
__m256d __t = _mm256_set1_pd(t);
__m256d __it = _mm256_set1_pd(it); // Inverse t
// Procedure:
// (__vector_self * __it) + (__vector_other * __t)
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
// Retrieve result, and apply it
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
#else
x = it * x + t * other.x;
y = it * y + t * other.y;
#endif
return;
}
// Slow, lame version for intcels
template<>
void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
{
const double it = 1.0 - t; // Inverse t
x = (int)(it * (double)x + t * (double)other.x);
y = (int)(it * (double)y + t * (double)other.y);
return;
}
template<>
Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
{
Vector2d copy(*this);
copy.LerpSelf(other, t);
return copy;
}
template<>
Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
{
Vector2d copy(this->ToDouble());
copy.LerpSelf(other.ToDouble(), t);
return copy;
}
template<typename T>
T& Vector2<T>::operator[](std::size_t idx)
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
default:
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
}
}
template<typename T>
const T& Vector2<T>::operator[](std::size_t idx) const
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
default:
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
}
}
template<typename T>
bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
{
return
(::Eule::Math::Similar(x, other.x, epsilon)) &&
(::Eule::Math::Similar(y, other.y, epsilon))
;
}
template<typename T>
Vector2<int> Vector2<T>::ToInt() const
{
return Vector2<int>((int)x, (int)y);
}
template<typename T>
Vector2<double> Vector2<T>::ToDouble() const
{
return Vector2<double>((double)x, (double)y);
}
template<>
Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and return these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
return Vector2<double>(
sum[0],
sum[1]
);
#else
return Vector2<double>(
x + other.x,
y + other.y
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
{
return Vector2<T>(
x + other.x,
y + other.y
);
}
template<>
void Vector2<double>::operator+=(const Vector2<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
#else
x += other.x;
y += other.y;
#endif
return;
}
template<typename T>
void Vector2<T>::operator+=(const Vector2<T>& other)
{
x += other.x;
y += other.y;
return;
}
template<>
Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and return these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
return Vector2<double>(
diff[0],
diff[1]
);
#else
return Vector2<double>(
x - other.x,
y - other.y
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
{
return Vector2<T>(
x - other.x,
y - other.y
);
}
template<>
void Vector2<double>::operator-=(const Vector2<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
x = diff[0];
y = diff[1];
#else
x -= other.x;
y -= other.y;
#endif
return;
}
template<typename T>
void Vector2<T>::operator-=(const Vector2<T>& other)
{
x -= other.x;
y -= other.y;
return;
}
template<>
Vector2<double> Vector2<double>::operator*(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector2<double>(
prod[0],
prod[1]
);
#else
return Vector2<double>(
x * scale,
y * scale
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator*(const T scale) const
{
return Vector2<T>(
x * scale,
y * scale
);
}
template<>
void Vector2<double>::operator*=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x *= scale;
y *= scale;
#endif
return;
}
template<typename T>
void Vector2<T>::operator*=(const T scale)
{
x *= scale;
y *= scale;
return;
}
template<>
Vector2<double> Vector2<double>::operator/(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector2<double>(
prod[0],
prod[1]
);
#else
return Vector2<double>(
x / scale,
y / scale
);
#endif
}
template<typename T>
Vector2<T> Vector2<T>::operator/(const T scale) const
{
return Vector2<T>(
x / scale,
y / scale
);
}
template<>
void Vector2<double>::operator/=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
#else
x /= scale;
y /= scale;
#endif
return;
}
template<typename T>
void Vector2<T>::operator/=(const T scale)
{
x /= scale;
y /= scale;
return;
}
template<typename T>
void Vector2<T>::operator=(const Vector2<T>& other)
{
x = other.x;
y = other.y;
return;
}
template<typename T>
void Vector2<T>::operator=(Vector2<T>&& other) noexcept
{
x = std::move(other.x);
y = std::move(other.y);
return;
}
template<typename T>
bool Vector2<T>::operator==(const Vector2<T>& other) const
{
return
(x == other.x) &&
(y == other.y);
}
template<typename T>
bool Vector2<T>::operator!=(const Vector2<T>& other) const
{
return !operator==(other);
}
template<typename T>
Vector2<T> Vector2<T>::operator-() const
{
return Vector2<T>(
-x,
-y
);
}
template class Vector2<int>;
template class Vector2<double>;
// Some handy predefines
template <typename T>
const Vector2<double> Vector2<T>::up(0, 1);
template <typename T>
const Vector2<double> Vector2<T>::down(0, -1);
template <typename T>
const Vector2<double> Vector2<T>::right(1, 0);
template <typename T>
const Vector2<double> Vector2<T>::left(-1, 0);
template <typename T>
const Vector2<double> Vector2<T>::one(1, 1);
template <typename T>
const Vector2<double> Vector2<T>::zero(0, 0);
}

View File

@ -2,14 +2,832 @@
#include "Vector2.h"
#include "Vector3.h"
template<typename T>
Eule::Vector4<T>::operator Eule::Vector2<T>() const
{
return Vector2<T>(x, y);
}
#include "Math.h"
#include <iostream>
template<typename T>
Eule::Vector4<T>::operator Eule::Vector3<T>() const
{
return Vector3<T>(x, y, z);
//#define _EULE_NO_INTRINSICS_
#ifndef _EULE_NO_INTRINSICS_
#include <immintrin.h>
#endif
/*
NOTE:
Here you will find bad, unoptimized methods for T=int.
This is because the compiler needs a method for each type in each instantiation of the template!
I can't generalize the methods when heavily optimizing for doubles.
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
*/
namespace Eule {
template<typename T>
Vector4<T>::operator Vector2<T>() const
{
return Vector2<T>(x, y);
}
template<typename T>
Vector4<T>::operator Vector3<T>() const
{
return Vector3<T>(x, y, z);
}
// Good, optimized chad version for doubles
template<>
double Vector4<double>::SqrMagnitude() const
{
return (x * x) +
(y * y) +
(z * z) +
(w * w);
}
// Slow, lame version for intcels
template<>
double Vector4<int>::SqrMagnitude() const
{
int iSqrMag = x*x + y*y + z*z + w*w;
return (double)iSqrMag;
}
template<typename T>
double Vector4<T>::Magnitude() const
{
return sqrt(SqrMagnitude());
}
template<>
Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
{
#ifndef _EULE_NO_INTRINSICS_
// Load vectors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
// Multiply them
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
// Retrieve result
double result[4];
_mm256_storeu_pd(result, __product);
// Return value
return Vector4<double>(
result[0],
result[1],
result[2],
result[3]
);
#else
return Vector4<double>(
x * scalar.x,
y * scalar.y,
z * scalar.z,
w * scalar.w
);
#endif
}
template<>
Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
{
return Vector4<int>(
x * scalar.x,
y * scalar.y,
z * scalar.z,
w * scalar.w
);
}
template<typename T>
Vector4<double> Vector4<T>::Normalize() const
{
Vector4<double> norm(x, y, z, w);
norm.NormalizeSelf();
return norm;
}
// Method to normalize a Vector4d
template<>
void Vector4<double>::NormalizeSelf()
{
double length = Magnitude();
// Prevent division by 0
if (length == 0)
{
x = 0;
y = 0;
z = 0;
w = 0;
}
else
{
#ifndef _EULE_NO_INTRINSICS_
// Load vector and length into registers
__m256d __vec = _mm256_set_pd(w, z, y, x);
__m256d __len = _mm256_set1_pd(length);
// Divide
__m256d __prod = _mm256_div_pd(__vec, __len);
// Extract and set values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x /= length;
y /= length;
z /= length;
w /= length;
#endif
}
return;
}
// You can't normalize an int vector, ffs!
// But we need an implementation for T=int
template<>
void Vector4<int>::NormalizeSelf()
{
x = 0;
y = 0;
z = 0;
w = 0;
return;
}
template<typename T>
bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
{
return
(::Eule::Math::Similar(x, other.x, epsilon)) &&
(::Eule::Math::Similar(y, other.y, epsilon)) &&
(::Eule::Math::Similar(z, other.z, epsilon)) &&
(::Eule::Math::Similar(w, other.w, epsilon))
;
}
template<typename T>
Vector4<int> Vector4<T>::ToInt() const
{
return Vector4<int>((int)x, (int)y, (int)z, (int)w);
}
template<typename T>
Vector4<double> Vector4<T>::ToDouble() const
{
return Vector4<double>((double)x, (double)y, (double)z, (double)w);
}
template<typename T>
T& Vector4<T>::operator[](std::size_t idx)
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
case 2:
return z;
case 3:
return w;
default:
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
}
}
template<typename T>
const T& Vector4<T>::operator[](std::size_t idx) const
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
case 2:
return z;
case 3:
return w;
default:
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
}
}
// Good, optimized chad version for doubles
template<>
void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
{
const double it = 1.0 - t; // Inverse t
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
__m256d __t = _mm256_set1_pd(t);
__m256d __it = _mm256_set1_pd(it); // Inverse t
// Procedure:
// (__vector_self * __it) + (__vector_other * __t)
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
// Retrieve result, and apply it
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
z = sum[2];
w = sum[3];
#else
x = it * x + t * other.x;
y = it * y + t * other.y;
z = it * z + t * other.z;
w = it * w + t * other.w;
#endif
return;
}
// Slow, lame version for intcels
template<>
void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
{
const double it = 1.0 - t;
x = (int)(it * (double)x + t * (double)other.x);
y = (int)(it * (double)y + t * (double)other.y);
z = (int)(it * (double)z + t * (double)other.z);
w = (int)(it * (double)w + t * (double)other.w);
return;
}
template<>
Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
{
Vector4d copy(*this);
copy.LerpSelf(other, t);
return copy;
}
template<>
Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
{
Vector4d copy(this->ToDouble());
copy.LerpSelf(other.ToDouble(), t);
return copy;
}
template<>
Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and return these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
return Vector4<double>(
sum[0],
sum[1],
sum[2],
sum[3]
);
#else
return Vector4<double>(
x + other.x,
y + other.y,
z + other.z,
w + other.w
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
{
return Vector4<T>(
x + other.x,
y + other.y,
z + other.z,
w + other.w
);
}
template<>
void Vector4<double>::operator+=(const Vector4<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
z = sum[2];
w = sum[3];
#else
x += other.x;
y += other.y;
z += other.z;
w += other.w;
#endif
return;
}
template<typename T>
void Vector4<T>::operator+=(const Vector4<T>& other)
{
x += other.x;
y += other.y;
z += other.z;
w += other.w;
return;
}
template<>
Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and return these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
return Vector4<double>(
diff[0],
diff[1],
diff[2],
diff[3]
);
#else
return Vector4<double>(
x - other.x,
y - other.y,
z - other.z,
w - other.w
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
{
return Vector4<T>(
x - other.x,
y - other.y,
z - other.z,
w - other.w
);
}
template<>
void Vector4<double>::operator-=(const Vector4<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
x = diff[0];
y = diff[1];
z = diff[2];
w = diff[3];
#else
x -= other.x;
y -= other.y;
z -= other.z;
w -= other.w;
#endif
return;
}
template<typename T>
void Vector4<T>::operator-=(const Vector4<T>& other)
{
x -= other.x;
y -= other.y;
z -= other.z;
w -= other.w;
return;
}
template<>
Vector4<double> Vector4<double>::operator*(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector4<double>(
prod[0],
prod[1],
prod[2],
prod[3]
);
#else
return Vector4<double>(
x * scale,
y * scale,
z * scale,
w * scale
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator*(const T scale) const
{
return Vector4<T>(
x * scale,
y * scale,
z * scale,
w * scale
);
}
template<>
void Vector4<double>::operator*=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x *= scale;
y *= scale;
z *= scale;
w *= scale;
#endif
return;
}
template<typename T>
void Vector4<T>::operator*=(const T scale)
{
x *= scale;
y *= scale;
z *= scale;
w *= scale;
return;
}
template<>
Vector4<double> Vector4<double>::operator/(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector4<double>(
prod[0],
prod[1],
prod[2],
prod[3]
);
#else
return Vector4<double>(
x / scale,
y / scale,
z / scale,
w / scale
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator/(const T scale) const
{
return Vector4<T>(
x / scale,
y / scale,
z / scale,
w / scale
);
}
template<>
void Vector4<double>::operator/=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x /= scale;
y /= scale;
z /= scale;
w /= scale;
#endif
return;
}
template<typename T>
void Vector4<T>::operator/=(const T scale)
{
x /= scale;
y /= scale;
z /= scale;
w /= scale;
return;
}
template<typename T>
bool Vector4<T>::operator==(const Vector4<T>& other) const
{
return
(x == other.x) &&
(y == other.y) &&
(z == other.z) &&
(w == other.w);
}
// Good, optimized chad version for doubles
template<>
Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
{
Vector4<double> newVec;
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
return newVec;
}
// Slow, lame version for intcels
template<>
Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
{
Vector4<double> newVec;
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
return Vector4<int>(
(int)newVec.x,
(int)newVec.y,
(int)newVec.z,
(int)newVec.w
);
}
// Good, optimized chad version for doubles
template<>
void Vector4<double>::operator*=(const Matrix4x4& mat)
{
Vector4<double> buffer = *this;
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
// idk right now. check that if something doesn't work
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
return;
}
template<typename T>
Vector4<T> Vector4<T>::operator-() const
{
return Vector4<T>(
-x,
-y,
-z,
-w
);
}
template<typename T>
void Vector4<T>::operator=(const Vector4<T>& other)
{
x = other.x;
y = other.y;
z = other.z;
w = other.w;
return;
}
template<typename T>
void Vector4<T>::operator=(Vector4<T>&& other) noexcept
{
x = std::move(other.x);
y = std::move(other.y);
z = std::move(other.z);
w = std::move(other.w);
return;
}
// Slow, lame version for intcels
template<>
void Vector4<int>::operator*=(const Matrix4x4& mat)
{
Vector4<double> buffer(x, y, z, w);
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
// idk right now. check that if something doesn't work
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
return;
}
template<typename T>
bool Vector4<T>::operator!=(const Vector4<T>& other) const
{
return !operator==(other);
}
template class Vector4<int>;
template class Vector4<double>;
// Some handy predefines
template <typename T>
const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
template <typename T>
const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
template <typename T>
const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
template <typename T>
const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
}

View File

@ -5,22 +5,6 @@
#include <sstream>
#include "Matrix4x4.h"
#include "Math.h"
#include <iostream>
//#define _EULE_NO_INTRINSICS_
#ifndef _EULE_NO_INTRINSICS_
#include <immintrin.h>
#endif
/*
NOTE:
Here you will find bad, unoptimized methods for T=int.
This is because the compiler needs a method for each type in each instantiation of the template!
I can't generalize the methods when heavily optimizing for doubles.
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
*/
namespace Eule
{
template <typename T> class Vector2;
@ -121,802 +105,4 @@ namespace Eule
typedef Vector4<int> Vector4i;
typedef Vector4<double> Vector4d;
// Good, optimized chad version for doubles
template<>
double Vector4<double>::SqrMagnitude() const
{
return (x * x) +
(y * y) +
(z * z) +
(w * w);
}
// Slow, lame version for intcels
template<>
double Vector4<int>::SqrMagnitude() const
{
int iSqrMag = x*x + y*y + z*z + w*w;
return (double)iSqrMag;
}
template<typename T>
double Vector4<T>::Magnitude() const
{
return sqrt(SqrMagnitude());
}
template<>
Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
{
#ifndef _EULE_NO_INTRINSICS_
// Load vectors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
// Multiply them
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
// Retrieve result
double result[4];
_mm256_storeu_pd(result, __product);
// Return value
return Vector4<double>(
result[0],
result[1],
result[2],
result[3]
);
#else
return Vector4<double>(
x * scalar.x,
y * scalar.y,
z * scalar.z,
w * scalar.w
);
#endif
}
template<>
Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
{
return Vector4<int>(
x * scalar.x,
y * scalar.y,
z * scalar.z,
w * scalar.w
);
}
template<typename T>
Vector4<double> Vector4<T>::Normalize() const
{
Vector4<double> norm(x, y, z, w);
norm.NormalizeSelf();
return norm;
}
// Method to normalize a Vector4d
template<>
void Vector4<double>::NormalizeSelf()
{
double length = Magnitude();
// Prevent division by 0
if (length == 0)
{
x = 0;
y = 0;
z = 0;
w = 0;
}
else
{
#ifndef _EULE_NO_INTRINSICS_
// Load vector and length into registers
__m256d __vec = _mm256_set_pd(w, z, y, x);
__m256d __len = _mm256_set1_pd(length);
// Divide
__m256d __prod = _mm256_div_pd(__vec, __len);
// Extract and set values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x /= length;
y /= length;
z /= length;
w /= length;
#endif
}
return;
}
// You can't normalize an int vector, ffs!
// But we need an implementation for T=int
template<>
void Vector4<int>::NormalizeSelf()
{
x = 0;
y = 0;
z = 0;
w = 0;
return;
}
template<typename T>
bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
{
return
(::Eule::Math::Similar(x, other.x, epsilon)) &&
(::Eule::Math::Similar(y, other.y, epsilon)) &&
(::Eule::Math::Similar(z, other.z, epsilon)) &&
(::Eule::Math::Similar(w, other.w, epsilon))
;
}
template<typename T>
Vector4<int> Vector4<T>::ToInt() const
{
return Vector4<int>((int)x, (int)y, (int)z, (int)w);
}
template<typename T>
Vector4<double> Vector4<T>::ToDouble() const
{
return Vector4<double>((double)x, (double)y, (double)z, (double)w);
}
template<typename T>
T& Vector4<T>::operator[](std::size_t idx)
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
case 2:
return z;
case 3:
return w;
default:
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
}
}
template<typename T>
const T& Vector4<T>::operator[](std::size_t idx) const
{
switch (idx)
{
case 0:
return x;
case 1:
return y;
case 2:
return z;
case 3:
return w;
default:
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
}
}
// Good, optimized chad version for doubles
template<>
void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
{
const double it = 1.0 - t; // Inverse t
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
__m256d __t = _mm256_set1_pd(t);
__m256d __it = _mm256_set1_pd(it); // Inverse t
// Procedure:
// (__vector_self * __it) + (__vector_other * __t)
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
// Retrieve result, and apply it
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
z = sum[2];
w = sum[3];
#else
x = it * x + t * other.x;
y = it * y + t * other.y;
z = it * z + t * other.z;
w = it * w + t * other.w;
#endif
return;
}
// Slow, lame version for intcels
template<>
void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
{
const double it = 1.0 - t;
x = (int)(it * (double)x + t * (double)other.x);
y = (int)(it * (double)y + t * (double)other.y);
z = (int)(it * (double)z + t * (double)other.z);
w = (int)(it * (double)w + t * (double)other.w);
return;
}
template<>
Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
{
Vector4d copy(*this);
copy.LerpSelf(other, t);
return copy;
}
template<>
Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
{
Vector4d copy(this->ToDouble());
copy.LerpSelf(other.ToDouble(), t);
return copy;
}
template<>
Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and return these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
return Vector4<double>(
sum[0],
sum[1],
sum[2],
sum[3]
);
#else
return Vector4<double>(
x + other.x,
y + other.y,
z + other.z,
w + other.w
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
{
return Vector4<T>(
x + other.x,
y + other.y,
z + other.z,
w + other.w
);
}
template<>
void Vector4<double>::operator+=(const Vector4<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Add the components
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double sum[4];
_mm256_storeu_pd(sum, __sum);
x = sum[0];
y = sum[1];
z = sum[2];
w = sum[3];
#else
x += other.x;
y += other.y;
z += other.z;
w += other.w;
#endif
return;
}
template<typename T>
void Vector4<T>::operator+=(const Vector4<T>& other)
{
x += other.x;
y += other.y;
z += other.z;
w += other.w;
return;
}
template<>
Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and return these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
return Vector4<double>(
diff[0],
diff[1],
diff[2],
diff[3]
);
#else
return Vector4<double>(
x - other.x,
y - other.y,
z - other.z,
w - other.w
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
{
return Vector4<T>(
x - other.x,
y - other.y,
z - other.z,
w - other.w
);
}
template<>
void Vector4<double>::operator-=(const Vector4<double>& other)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
// Subtract the components
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
// Retrieve and apply these values
double diff[4];
_mm256_storeu_pd(diff, __diff);
x = diff[0];
y = diff[1];
z = diff[2];
w = diff[3];
#else
x -= other.x;
y -= other.y;
z -= other.z;
w -= other.w;
#endif
return;
}
template<typename T>
void Vector4<T>::operator-=(const Vector4<T>& other)
{
x -= other.x;
y -= other.y;
z -= other.z;
w -= other.w;
return;
}
template<>
Vector4<double> Vector4<double>::operator*(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector4<double>(
prod[0],
prod[1],
prod[2],
prod[3]
);
#else
return Vector4<double>(
x * scale,
y * scale,
z * scale,
w * scale
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator*(const T scale) const
{
return Vector4<T>(
x * scale,
y * scale,
z * scale,
w * scale
);
}
template<>
void Vector4<double>::operator*=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Multiply the components
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x *= scale;
y *= scale;
z *= scale;
w *= scale;
#endif
return;
}
template<typename T>
void Vector4<T>::operator*=(const T scale)
{
x *= scale;
y *= scale;
z *= scale;
w *= scale;
return;
}
template<>
Vector4<double> Vector4<double>::operator/(const double scale) const
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and return these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
return Vector4<double>(
prod[0],
prod[1],
prod[2],
prod[3]
);
#else
return Vector4<double>(
x / scale,
y / scale,
z / scale,
w / scale
);
#endif
}
template<typename T>
Vector4<T> Vector4<T>::operator/(const T scale) const
{
return Vector4<T>(
x / scale,
y / scale,
z / scale,
w / scale
);
}
template<>
void Vector4<double>::operator/=(const double scale)
{
#ifndef _EULE_NO_INTRINSICS_
// Move vector components and factors into registers
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
__m256d __scalar = _mm256_set1_pd(scale);
// Divide the components
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
// Retrieve and apply these values
double prod[4];
_mm256_storeu_pd(prod, __prod);
x = prod[0];
y = prod[1];
z = prod[2];
w = prod[3];
#else
x /= scale;
y /= scale;
z /= scale;
w /= scale;
#endif
return;
}
template<typename T>
void Vector4<T>::operator/=(const T scale)
{
x /= scale;
y /= scale;
z /= scale;
w /= scale;
return;
}
template<typename T>
bool Vector4<T>::operator==(const Vector4<T>& other) const
{
return
(x == other.x) &&
(y == other.y) &&
(z == other.z) &&
(w == other.w);
}
// Good, optimized chad version for doubles
template<>
Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
{
Vector4<double> newVec;
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
return newVec;
}
// Slow, lame version for intcels
template<>
Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
{
Vector4<double> newVec;
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
return Vector4<int>(
(int)newVec.x,
(int)newVec.y,
(int)newVec.z,
(int)newVec.w
);
}
// Good, optimized chad version for doubles
template<>
void Vector4<double>::operator*=(const Matrix4x4& mat)
{
Vector4<double> buffer = *this;
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
// idk right now. check that if something doesn't work
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
return;
}
template<typename T>
Vector4<T> Vector4<T>::operator-() const
{
return Vector4<T>(
-x,
-y,
-z,
-w
);
}
template<typename T>
void Vector4<T>::operator=(const Vector4<T>& other)
{
x = other.x;
y = other.y;
z = other.z;
w = other.w;
return;
}
template<typename T>
void Vector4<T>::operator=(Vector4<T>&& other) noexcept
{
x = std::move(other.x);
y = std::move(other.y);
z = std::move(other.z);
w = std::move(other.w);
return;
}
// Slow, lame version for intcels
template<>
void Vector4<int>::operator*=(const Matrix4x4& mat)
{
Vector4<double> buffer(x, y, z, w);
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
// idk right now. check that if something doesn't work
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
return;
}
template<typename T>
bool Vector4<T>::operator!=(const Vector4<T>& other) const
{
return !operator==(other);
}
template class Vector4<int>;
template class Vector4<double>;
// Some handy predefines
template <typename T>
const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
template <typename T>
const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
template <typename T>
const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
template <typename T>
const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
template <typename T>
const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
}