Fix build warnings on macos
This commit is contained in:
parent
c86380f8ff
commit
fa825e9167
@ -2,6 +2,7 @@
|
||||
#include "Constants.h"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <cmath>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
711
Eule/Vector2.cpp
711
Eule/Vector2.cpp
@ -1,720 +1,15 @@
|
||||
#include "Vector2.h"
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
using namespace Eule;
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::DotProduct(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components into registers
|
||||
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
|
||||
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
|
||||
|
||||
// Define bitmask, and execute computation
|
||||
const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
|
||||
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
|
||||
|
||||
// Retrieve result, and return it
|
||||
float result[8];
|
||||
_mm256_storeu_ps(result, __dot);
|
||||
|
||||
return result[0];
|
||||
|
||||
#else
|
||||
return (x * other.x) +
|
||||
(y * other.y);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::DotProduct(const Vector2<int>& other) const
|
||||
{
|
||||
int iDot = (x * other.x) +
|
||||
(y * other.y);
|
||||
|
||||
return (double)iDot;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::CrossProduct(const Vector2<double>& other) const
|
||||
{
|
||||
return (x * other.y) -
|
||||
(y * other.x);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::CrossProduct(const Vector2<int>& other) const
|
||||
{
|
||||
int iCross = (x * other.y) -
|
||||
(y * other.x);
|
||||
|
||||
return (double)iCross;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::SqrMagnitude() const
|
||||
{
|
||||
// x.DotProduct(x) == x.SqrMagnitude()
|
||||
return DotProduct(*this);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
double Vector2<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector2<double>(
|
||||
result[0],
|
||||
result[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
|
||||
{
|
||||
return Vector2<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector2<double> Vector2<T>::Normalize() const
|
||||
{
|
||||
Vector2<double> norm(x, y);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector2d
|
||||
template<>
|
||||
void Vector2<double>::NormalizeSelf()
|
||||
{
|
||||
double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector2<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
|
||||
#else
|
||||
|
||||
x = it * x + t * other.x;
|
||||
y = it * y + t * other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
|
||||
{
|
||||
Vector2d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
|
||||
{
|
||||
Vector2d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
T& Vector2<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector2<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Math::Similar(y, other.y, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<int> Vector2<T>::ToInt() const
|
||||
{
|
||||
return Vector2<int>((int)x, (int)y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<double> Vector2<T>::ToDouble() const
|
||||
{
|
||||
return Vector2<double>((double)x, (double)y);
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector2<double>(
|
||||
sum[0],
|
||||
sum[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x + other.x,
|
||||
y + other.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x + other.x,
|
||||
y + other.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator+=(const Vector2<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator+=(const Vector2<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector2<double>(
|
||||
diff[0],
|
||||
diff[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x - other.x,
|
||||
y - other.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x - other.x,
|
||||
y - other.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator-=(const Vector2<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator-=(const Vector2<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector2<double>(
|
||||
prod[0],
|
||||
prod[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x * scale,
|
||||
y * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x * scale,
|
||||
y * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector2<double>(
|
||||
prod[0],
|
||||
prod[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x / scale,
|
||||
y / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x / scale,
|
||||
y / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator=(const Vector2<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator=(Vector2<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::operator==(const Vector2<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::operator!=(const Vector2<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator-() const
|
||||
{
|
||||
return Vector2<T>(
|
||||
-x,
|
||||
-y
|
||||
);
|
||||
}
|
||||
|
||||
// Don't want these includes above the other stuff
|
||||
#include "Vector3.h"
|
||||
#include "Vector4.h"
|
||||
|
||||
template<typename T>
|
||||
Vector2<T>::operator Vector3<T>() const
|
||||
Eule::Vector2<T>::operator Eule::Vector3<T>() const
|
||||
{
|
||||
return Vector3<T>(x, y, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T>::operator Vector4<T>() const
|
||||
Eule::Vector2<T>::operator Eule::Vector4<T>() const
|
||||
{
|
||||
return Vector4<T>(x, y, 0, 0);
|
||||
}
|
||||
|
||||
template class Vector2<int>;
|
||||
template class Vector2<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::up(0, 1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::down(0, -1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::right(1, 0);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::left(-1, 0);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::one(1, 1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::zero(0, 0);
|
||||
|
702
Eule/Vector2.h
702
Eule/Vector2.h
@ -1,6 +1,22 @@
|
||||
#pragma once
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
|
||||
namespace Eule
|
||||
{
|
||||
@ -100,4 +116,690 @@ namespace Eule
|
||||
|
||||
typedef Vector2<int> Vector2i;
|
||||
typedef Vector2<double> Vector2d;
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::DotProduct(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components into registers
|
||||
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
|
||||
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
|
||||
|
||||
// Define bitmask, and execute computation
|
||||
const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
|
||||
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
|
||||
|
||||
// Retrieve result, and return it
|
||||
float result[8];
|
||||
_mm256_storeu_ps(result, __dot);
|
||||
|
||||
return result[0];
|
||||
|
||||
#else
|
||||
return (x * other.x) +
|
||||
(y * other.y);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::DotProduct(const Vector2<int>& other) const
|
||||
{
|
||||
int iDot = (x * other.x) +
|
||||
(y * other.y);
|
||||
|
||||
return (double)iDot;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::CrossProduct(const Vector2<double>& other) const
|
||||
{
|
||||
return (x * other.y) -
|
||||
(y * other.x);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::CrossProduct(const Vector2<int>& other) const
|
||||
{
|
||||
int iCross = (x * other.y) -
|
||||
(y * other.x);
|
||||
|
||||
return (double)iCross;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector2<double>::SqrMagnitude() const
|
||||
{
|
||||
// x.DotProduct(x) == x.SqrMagnitude()
|
||||
return DotProduct(*this);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector2<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
double Vector2<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector2<double>(
|
||||
result[0],
|
||||
result[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
|
||||
{
|
||||
return Vector2<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector2<double> Vector2<T>::Normalize() const
|
||||
{
|
||||
Vector2<double> norm(x, y);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector2d
|
||||
template<>
|
||||
void Vector2<double>::NormalizeSelf()
|
||||
{
|
||||
double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector2<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
|
||||
#else
|
||||
|
||||
x = it * x + t * other.x;
|
||||
y = it * y + t * other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
|
||||
{
|
||||
Vector2d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
|
||||
{
|
||||
Vector2d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
T& Vector2<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector2<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Eule::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Eule::Math::Similar(y, other.y, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<int> Vector2<T>::ToInt() const
|
||||
{
|
||||
return Vector2<int>((int)x, (int)y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<double> Vector2<T>::ToDouble() const
|
||||
{
|
||||
return Vector2<double>((double)x, (double)y);
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector2<double>(
|
||||
sum[0],
|
||||
sum[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x + other.x,
|
||||
y + other.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x + other.x,
|
||||
y + other.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator+=(const Vector2<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator+=(const Vector2<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector2<double>(
|
||||
diff[0],
|
||||
diff[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x - other.x,
|
||||
y - other.y
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x - other.x,
|
||||
y - other.y
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator-=(const Vector2<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator-=(const Vector2<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector2<double>(
|
||||
prod[0],
|
||||
prod[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x * scale,
|
||||
y * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x * scale,
|
||||
y * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector2<double> Vector2<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector2<double>(
|
||||
prod[0],
|
||||
prod[1]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector2<double>(
|
||||
x / scale,
|
||||
y / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector2<T>(
|
||||
x / scale,
|
||||
y / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector2<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator=(const Vector2<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector2<T>::operator=(Vector2<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::operator==(const Vector2<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector2<T>::operator!=(const Vector2<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector2<T> Vector2<T>::operator-() const
|
||||
{
|
||||
return Vector2<T>(
|
||||
-x,
|
||||
-y
|
||||
);
|
||||
}
|
||||
|
||||
template class Vector2<int>;
|
||||
template class Vector2<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::up(0, 1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::down(0, -1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::right(1, 0);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::left(-1, 0);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::one(1, 1);
|
||||
template <typename T>
|
||||
const Vector2<double> Vector2<T>::zero(0, 0);
|
||||
}
|
||||
|
917
Eule/Vector3.cpp
917
Eule/Vector3.cpp
@ -1,928 +1,19 @@
|
||||
#include "Vector3.h"
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
using namespace Eule;
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector3<double>::DotProduct(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components into registers
|
||||
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x);
|
||||
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x);
|
||||
|
||||
// Define bitmask, and execute computation
|
||||
const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot
|
||||
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
|
||||
|
||||
// Retrieve result, and return it
|
||||
float result[8];
|
||||
_mm256_storeu_ps(result, __dot);
|
||||
|
||||
return result[0];
|
||||
|
||||
#else
|
||||
return (x * other.x) +
|
||||
(y * other.y) +
|
||||
(z * other.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector3<int>::DotProduct(const Vector3<int>& other) const
|
||||
{
|
||||
int iDot = (x * other.x) + (y * other.y) + (z * other.z);
|
||||
return (double)iDot;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::CrossProduct(const Vector3<double>& other) const
|
||||
{
|
||||
Vector3<double> cp;
|
||||
cp.x = (y * other.z) - (z * other.y);
|
||||
cp.y = (z * other.x) - (x * other.z);
|
||||
cp.z = (x * other.y) - (y * other.x);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector3<double> Vector3<int>::CrossProduct(const Vector3<int>& other) const
|
||||
{
|
||||
Vector3<double> cp;
|
||||
cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y);
|
||||
cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z);
|
||||
cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector3<double>::SqrMagnitude() const
|
||||
{
|
||||
// x.DotProduct(x) == x.SqrMagnitude()
|
||||
return DotProduct(*this);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector3<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y + z*z;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
double Vector3<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::VectorScale(const Vector3<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector3<double>(
|
||||
result[0],
|
||||
result[1],
|
||||
result[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<int> Vector3<int>::VectorScale(const Vector3<int>& scalar) const
|
||||
{
|
||||
return Vector3<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector3<double> Vector3<T>::Normalize() const
|
||||
{
|
||||
Vector3<double> norm(x, y, z);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector3d
|
||||
template<>
|
||||
void Vector3<double>::NormalizeSelf()
|
||||
{
|
||||
const double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
z /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector3<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::Similar(const Vector3<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Math::Similar(y, other.y, epsilon)) &&
|
||||
(::Math::Similar(z, other.z, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<int> Vector3<T>::ToInt() const
|
||||
{
|
||||
return Vector3<int>((int)x, (int)y, (int)z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<double> Vector3<T>::ToDouble() const
|
||||
{
|
||||
return Vector3<double>((double)x, (double)y, (double)z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T& Vector3<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector3<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector3<double>::LerpSelf(const Vector3<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
|
||||
#else
|
||||
|
||||
x = it*x + t*other.x;
|
||||
y = it*y + t*other.y;
|
||||
z = it*z + t*other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector3<int>::LerpSelf(const Vector3<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
z = (int)(it * (double)z + t * (double)other.z);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::Lerp(const Vector3<double>& other, double t) const
|
||||
{
|
||||
Vector3d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<int>::Lerp(const Vector3<int>& other, double t) const
|
||||
{
|
||||
Vector3d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator+(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector3<double>(
|
||||
sum[0],
|
||||
sum[1],
|
||||
sum[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator+(const Vector3<T>& other) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator+=(const Vector3<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator+=(const Vector3<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator-(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector3<double>(
|
||||
diff[0],
|
||||
diff[1],
|
||||
diff[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator-(const Vector3<T>& other) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator-=(const Vector3<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
z = diff[2];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator-=(const Vector3<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector3<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector3<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector3<double> newVec;
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
// Store x, y, and z values
|
||||
__m256d __vecx = _mm256_set1_pd(x);
|
||||
__m256d __vecy = _mm256_set1_pd(y);
|
||||
__m256d __vecz = _mm256_set1_pd(z);
|
||||
|
||||
// Store matrix values
|
||||
__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
|
||||
__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
|
||||
__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
|
||||
|
||||
// Multiply x, y, z and matrix values
|
||||
__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
|
||||
__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
|
||||
__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
|
||||
|
||||
// Sum up the products
|
||||
__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
|
||||
|
||||
// Store translation values
|
||||
__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
|
||||
|
||||
// Add the translation values
|
||||
__m256d __final = _mm256_add_pd(__sum, __translation);
|
||||
|
||||
double dfinal[4];
|
||||
|
||||
_mm256_storeu_pd(dfinal, __final);
|
||||
|
||||
newVec.x = dfinal[3];
|
||||
newVec.y = dfinal[2];
|
||||
newVec.z = dfinal[1];
|
||||
|
||||
#else
|
||||
// Rotation, Scaling
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
|
||||
|
||||
// Translation
|
||||
newVec.x += mat[0][3];
|
||||
newVec.y += mat[1][3];
|
||||
newVec.z += mat[2][3];
|
||||
#endif
|
||||
|
||||
return newVec;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector3<int> Vector3<int>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector3<double> newVec;
|
||||
|
||||
// Rotation, Scaling
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
|
||||
|
||||
// Translation
|
||||
newVec.x += mat[0][3];
|
||||
newVec.y += mat[1][3];
|
||||
newVec.z += mat[2][3];
|
||||
|
||||
return Vector3<int>(
|
||||
(int)newVec.x,
|
||||
(int)newVec.y,
|
||||
(int)newVec.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector3<double>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
// Store x, y, and z values
|
||||
__m256d __vecx = _mm256_set1_pd(x);
|
||||
__m256d __vecy = _mm256_set1_pd(y);
|
||||
__m256d __vecz = _mm256_set1_pd(z);
|
||||
|
||||
// Store matrix values
|
||||
__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
|
||||
__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
|
||||
__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
|
||||
|
||||
// Multiply x, y, z and matrix values
|
||||
__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
|
||||
__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
|
||||
__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
|
||||
|
||||
// Sum up the products
|
||||
__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
|
||||
|
||||
// Store translation values
|
||||
__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
|
||||
|
||||
// Add the translation values
|
||||
__m256d __final = _mm256_add_pd(__sum, __translation);
|
||||
|
||||
double dfinal[4];
|
||||
|
||||
_mm256_storeu_pd(dfinal, __final);
|
||||
|
||||
x = dfinal[3];
|
||||
y = dfinal[2];
|
||||
z = dfinal[1];
|
||||
|
||||
#else
|
||||
Vector3<double> buffer = *this;
|
||||
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z);
|
||||
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z);
|
||||
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z);
|
||||
|
||||
// Translation
|
||||
x += mat[0][3];
|
||||
y += mat[1][3];
|
||||
z += mat[2][3];
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator-() const
|
||||
{
|
||||
return Vector3<T>(
|
||||
-x,
|
||||
-y,
|
||||
-z
|
||||
);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator=(const Vector3<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
z = other.z;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator=(Vector3<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
z = std::move(other.z);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector3<int>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector3<double> buffer(x, y, z);
|
||||
|
||||
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z));
|
||||
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z));
|
||||
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z));
|
||||
|
||||
// Translation
|
||||
x += (int)mat[0][3];
|
||||
y += (int)mat[1][3];
|
||||
z += (int)mat[2][3];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::operator==(const Vector3<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y) &&
|
||||
(z == other.z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::operator!=(const Vector3<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
|
||||
#include "Vector3.h"
|
||||
#include "Vector2.h"
|
||||
#include "Vector4.h"
|
||||
|
||||
template<typename T>
|
||||
Vector3<T>::operator Vector2<T>() const
|
||||
Eule::Vector3<T>::operator Eule::Vector2<T>() const
|
||||
{
|
||||
return Vector2<T>(x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T>::operator Vector4<T>() const
|
||||
Eule::Vector3<T>::operator Eule::Vector4<T>() const
|
||||
{
|
||||
return Vector4<T>(x, y, z, 0);
|
||||
}
|
||||
|
||||
template class Vector3<int>;
|
||||
template class Vector3<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::up(0, 1, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::down(0, -1, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::right(1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::left(-1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::forward(0, 0, 1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::backward(0, 0, -1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::one(1, 1, 1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::zero(0, 0, 0);
|
||||
|
912
Eule/Vector3.h
912
Eule/Vector3.h
@ -5,6 +5,23 @@
|
||||
#include <sstream>
|
||||
#include "Matrix4x4.h"
|
||||
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
|
||||
namespace Eule
|
||||
{
|
||||
template <typename T> class Vector2;
|
||||
@ -108,4 +125,899 @@ namespace Eule
|
||||
|
||||
typedef Vector3<int> Vector3i;
|
||||
typedef Vector3<double> Vector3d;
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector3<double>::DotProduct(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components into registers
|
||||
__m256 __vector_self = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x);
|
||||
__m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x);
|
||||
|
||||
// Define bitmask, and execute computation
|
||||
const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot
|
||||
__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
|
||||
|
||||
// Retrieve result, and return it
|
||||
float result[8];
|
||||
_mm256_storeu_ps(result, __dot);
|
||||
|
||||
return result[0];
|
||||
|
||||
#else
|
||||
return (x * other.x) +
|
||||
(y * other.y) +
|
||||
(z * other.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector3<int>::DotProduct(const Vector3<int>& other) const
|
||||
{
|
||||
int iDot = (x * other.x) + (y * other.y) + (z * other.z);
|
||||
return (double)iDot;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::CrossProduct(const Vector3<double>& other) const
|
||||
{
|
||||
Vector3<double> cp;
|
||||
cp.x = (y * other.z) - (z * other.y);
|
||||
cp.y = (z * other.x) - (x * other.z);
|
||||
cp.z = (x * other.y) - (y * other.x);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector3<double> Vector3<int>::CrossProduct(const Vector3<int>& other) const
|
||||
{
|
||||
Vector3<double> cp;
|
||||
cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y);
|
||||
cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z);
|
||||
cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector3<double>::SqrMagnitude() const
|
||||
{
|
||||
// x.DotProduct(x) == x.SqrMagnitude()
|
||||
return DotProduct(*this);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector3<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y + z*z;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
double Vector3<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::VectorScale(const Vector3<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector3<double>(
|
||||
result[0],
|
||||
result[1],
|
||||
result[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<int> Vector3<int>::VectorScale(const Vector3<int>& scalar) const
|
||||
{
|
||||
return Vector3<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector3<double> Vector3<T>::Normalize() const
|
||||
{
|
||||
Vector3<double> norm(x, y, z);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector3d
|
||||
template<>
|
||||
void Vector3<double>::NormalizeSelf()
|
||||
{
|
||||
const double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
z /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector3<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::Similar(const Vector3<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Eule::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Eule::Math::Similar(y, other.y, epsilon)) &&
|
||||
(::Eule::Math::Similar(z, other.z, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<int> Vector3<T>::ToInt() const
|
||||
{
|
||||
return Vector3<int>((int)x, (int)y, (int)z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<double> Vector3<T>::ToDouble() const
|
||||
{
|
||||
return Vector3<double>((double)x, (double)y, (double)z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T& Vector3<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector3<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector3<double>::LerpSelf(const Vector3<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
|
||||
#else
|
||||
|
||||
x = it*x + t*other.x;
|
||||
y = it*y + t*other.y;
|
||||
z = it*z + t*other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector3<int>::LerpSelf(const Vector3<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
z = (int)(it * (double)z + t * (double)other.z);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::Lerp(const Vector3<double>& other, double t) const
|
||||
{
|
||||
Vector3d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<int>::Lerp(const Vector3<int>& other, double t) const
|
||||
{
|
||||
Vector3d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator+(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector3<double>(
|
||||
sum[0],
|
||||
sum[1],
|
||||
sum[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator+(const Vector3<T>& other) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator+=(const Vector3<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator+=(const Vector3<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator-(const Vector3<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector3<double>(
|
||||
diff[0],
|
||||
diff[1],
|
||||
diff[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator-(const Vector3<T>& other) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator-=(const Vector3<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
z = diff[2];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator-=(const Vector3<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector3<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector3<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector3<double>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector3<T>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector3<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(0, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector3<double> Vector3<double>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector3<double> newVec;
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
// Store x, y, and z values
|
||||
__m256d __vecx = _mm256_set1_pd(x);
|
||||
__m256d __vecy = _mm256_set1_pd(y);
|
||||
__m256d __vecz = _mm256_set1_pd(z);
|
||||
|
||||
// Store matrix values
|
||||
__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
|
||||
__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
|
||||
__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
|
||||
|
||||
// Multiply x, y, z and matrix values
|
||||
__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
|
||||
__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
|
||||
__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
|
||||
|
||||
// Sum up the products
|
||||
__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
|
||||
|
||||
// Store translation values
|
||||
__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
|
||||
|
||||
// Add the translation values
|
||||
__m256d __final = _mm256_add_pd(__sum, __translation);
|
||||
|
||||
double dfinal[4];
|
||||
|
||||
_mm256_storeu_pd(dfinal, __final);
|
||||
|
||||
newVec.x = dfinal[3];
|
||||
newVec.y = dfinal[2];
|
||||
newVec.z = dfinal[1];
|
||||
|
||||
#else
|
||||
// Rotation, Scaling
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
|
||||
|
||||
// Translation
|
||||
newVec.x += mat[0][3];
|
||||
newVec.y += mat[1][3];
|
||||
newVec.z += mat[2][3];
|
||||
#endif
|
||||
|
||||
return newVec;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector3<int> Vector3<int>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector3<double> newVec;
|
||||
|
||||
// Rotation, Scaling
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
|
||||
|
||||
// Translation
|
||||
newVec.x += mat[0][3];
|
||||
newVec.y += mat[1][3];
|
||||
newVec.z += mat[2][3];
|
||||
|
||||
return Vector3<int>(
|
||||
(int)newVec.x,
|
||||
(int)newVec.y,
|
||||
(int)newVec.z
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector3<double>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
// Store x, y, and z values
|
||||
__m256d __vecx = _mm256_set1_pd(x);
|
||||
__m256d __vecy = _mm256_set1_pd(y);
|
||||
__m256d __vecz = _mm256_set1_pd(z);
|
||||
|
||||
// Store matrix values
|
||||
__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
|
||||
__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
|
||||
__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
|
||||
|
||||
// Multiply x, y, z and matrix values
|
||||
__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
|
||||
__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
|
||||
__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
|
||||
|
||||
// Sum up the products
|
||||
__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
|
||||
|
||||
// Store translation values
|
||||
__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
|
||||
|
||||
// Add the translation values
|
||||
__m256d __final = _mm256_add_pd(__sum, __translation);
|
||||
|
||||
double dfinal[4];
|
||||
|
||||
_mm256_storeu_pd(dfinal, __final);
|
||||
|
||||
x = dfinal[3];
|
||||
y = dfinal[2];
|
||||
z = dfinal[1];
|
||||
|
||||
#else
|
||||
Vector3<double> buffer = *this;
|
||||
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z);
|
||||
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z);
|
||||
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z);
|
||||
|
||||
// Translation
|
||||
x += mat[0][3];
|
||||
y += mat[1][3];
|
||||
z += mat[2][3];
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector3<T> Vector3<T>::operator-() const
|
||||
{
|
||||
return Vector3<T>(
|
||||
-x,
|
||||
-y,
|
||||
-z
|
||||
);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator=(const Vector3<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
z = other.z;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector3<T>::operator=(Vector3<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
z = std::move(other.z);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector3<int>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector3<double> buffer(x, y, z);
|
||||
|
||||
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z));
|
||||
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z));
|
||||
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z));
|
||||
|
||||
// Translation
|
||||
x += (int)mat[0][3];
|
||||
y += (int)mat[1][3];
|
||||
z += (int)mat[2][3];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::operator==(const Vector3<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y) &&
|
||||
(z == other.z);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector3<T>::operator!=(const Vector3<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
template class Vector3<int>;
|
||||
template class Vector3<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::up(0, 1, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::down(0, -1, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::right(1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::left(-1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::forward(0, 0, 1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::backward(0, 0, -1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::one(1, 1, 1);
|
||||
template <typename T>
|
||||
const Vector3<double> Vector3<T>::zero(0, 0, 0);
|
||||
|
||||
}
|
||||
|
822
Eule/Vector4.cpp
822
Eule/Vector4.cpp
@ -1,831 +1,15 @@
|
||||
#include "Vector4.h"
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
using namespace Eule;
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector4<double>::SqrMagnitude() const
|
||||
{
|
||||
return (x * x) +
|
||||
(y * y) +
|
||||
(z * z) +
|
||||
(w * w);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector4<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y + z*z + w*w;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
double Vector4<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector4<double>(
|
||||
result[0],
|
||||
result[1],
|
||||
result[2],
|
||||
result[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z,
|
||||
w * scalar.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
|
||||
{
|
||||
return Vector4<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z,
|
||||
w * scalar.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector4<double> Vector4<T>::Normalize() const
|
||||
{
|
||||
Vector4<double> norm(x, y, z, w);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector4d
|
||||
template<>
|
||||
void Vector4<double>::NormalizeSelf()
|
||||
{
|
||||
double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
w = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
z /= length;
|
||||
w /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector4<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
w = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Math::Similar(y, other.y, epsilon)) &&
|
||||
(::Math::Similar(z, other.z, epsilon)) &&
|
||||
(::Math::Similar(w, other.w, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<int> Vector4<T>::ToInt() const
|
||||
{
|
||||
return Vector4<int>((int)x, (int)y, (int)z, (int)w);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<double> Vector4<T>::ToDouble() const
|
||||
{
|
||||
return Vector4<double>((double)x, (double)y, (double)z, (double)w);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T& Vector4<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
case 3:
|
||||
return w;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector4<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
case 3:
|
||||
return w;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
w = sum[3];
|
||||
|
||||
#else
|
||||
|
||||
x = it * x + t * other.x;
|
||||
y = it * y + t * other.y;
|
||||
z = it * z + t * other.z;
|
||||
w = it * w + t * other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t;
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
z = (int)(it * (double)z + t * (double)other.z);
|
||||
w = (int)(it * (double)w + t * (double)other.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
|
||||
{
|
||||
Vector4d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
|
||||
{
|
||||
Vector4d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector4<double>(
|
||||
sum[0],
|
||||
sum[1],
|
||||
sum[2],
|
||||
sum[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z,
|
||||
w + other.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z,
|
||||
w + other.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator+=(const Vector4<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
w = sum[3];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator+=(const Vector4<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector4<double>(
|
||||
diff[0],
|
||||
diff[1],
|
||||
diff[2],
|
||||
diff[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z,
|
||||
w - other.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z,
|
||||
w - other.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator-=(const Vector4<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
z = diff[2];
|
||||
w = diff[3];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
w -= other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator-=(const Vector4<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
w -= other.w;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector4<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2],
|
||||
prod[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale,
|
||||
w * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale,
|
||||
w * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
w *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
w *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector4<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2],
|
||||
prod[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale,
|
||||
w / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale,
|
||||
w / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
w /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
w /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::operator==(const Vector4<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y) &&
|
||||
(z == other.z) &&
|
||||
(w == other.w);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector4<double> newVec;
|
||||
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
|
||||
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
|
||||
|
||||
return newVec;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector4<double> newVec;
|
||||
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
|
||||
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
|
||||
|
||||
return Vector4<int>(
|
||||
(int)newVec.x,
|
||||
(int)newVec.y,
|
||||
(int)newVec.z,
|
||||
(int)newVec.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector4<double>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector4<double> buffer = *this;
|
||||
|
||||
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
|
||||
// idk right now. check that if something doesn't work
|
||||
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
|
||||
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
|
||||
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
|
||||
w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator-() const
|
||||
{
|
||||
return Vector4<T>(
|
||||
-x,
|
||||
-y,
|
||||
-z,
|
||||
-w
|
||||
);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator=(const Vector4<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
z = other.z;
|
||||
w = other.w;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator=(Vector4<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
z = std::move(other.z);
|
||||
w = std::move(other.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector4<int>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector4<double> buffer(x, y, z, w);
|
||||
|
||||
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
|
||||
// idk right now. check that if something doesn't work
|
||||
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
|
||||
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
|
||||
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
|
||||
w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::operator!=(const Vector4<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
#include "Vector2.h"
|
||||
#include "Vector3.h"
|
||||
|
||||
template<typename T>
|
||||
Vector4<T>::operator Vector2<T>() const
|
||||
Eule::Vector4<T>::operator Eule::Vector2<T>() const
|
||||
{
|
||||
return Vector2<T>(x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T>::operator Vector3<T>() const
|
||||
Eule::Vector4<T>::operator Eule::Vector3<T>() const
|
||||
{
|
||||
return Vector3<T>(x, y, z);
|
||||
}
|
||||
|
||||
template class Vector4<int>;
|
||||
template class Vector4<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
|
||||
|
815
Eule/Vector4.h
815
Eule/Vector4.h
@ -5,6 +5,22 @@
|
||||
#include <sstream>
|
||||
#include "Matrix4x4.h"
|
||||
|
||||
#include "Math.h"
|
||||
#include <iostream>
|
||||
|
||||
//#define _EULE_NO_INTRINSICS_
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
NOTE:
|
||||
Here you will find bad, unoptimized methods for T=int.
|
||||
This is because the compiler needs a method for each type in each instantiation of the template!
|
||||
I can't generalize the methods when heavily optimizing for doubles.
|
||||
These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
|
||||
The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
|
||||
*/
|
||||
namespace Eule
|
||||
{
|
||||
template <typename T> class Vector2;
|
||||
@ -105,4 +121,803 @@ namespace Eule
|
||||
|
||||
typedef Vector4<int> Vector4i;
|
||||
typedef Vector4<double> Vector4d;
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
double Vector4<double>::SqrMagnitude() const
|
||||
{
|
||||
return (x * x) +
|
||||
(y * y) +
|
||||
(z * z) +
|
||||
(w * w);
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
double Vector4<int>::SqrMagnitude() const
|
||||
{
|
||||
int iSqrMag = x*x + y*y + z*z + w*w;
|
||||
return (double)iSqrMag;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
double Vector4<T>::Magnitude() const
|
||||
{
|
||||
return sqrt(SqrMagnitude());
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vectors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
|
||||
|
||||
// Multiply them
|
||||
__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
|
||||
|
||||
// Retrieve result
|
||||
double result[4];
|
||||
_mm256_storeu_pd(result, __product);
|
||||
|
||||
// Return value
|
||||
return Vector4<double>(
|
||||
result[0],
|
||||
result[1],
|
||||
result[2],
|
||||
result[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z,
|
||||
w * scalar.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
|
||||
{
|
||||
return Vector4<int>(
|
||||
x * scalar.x,
|
||||
y * scalar.y,
|
||||
z * scalar.z,
|
||||
w * scalar.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
Vector4<double> Vector4<T>::Normalize() const
|
||||
{
|
||||
Vector4<double> norm(x, y, z, w);
|
||||
norm.NormalizeSelf();
|
||||
|
||||
return norm;
|
||||
}
|
||||
|
||||
// Method to normalize a Vector4d
|
||||
template<>
|
||||
void Vector4<double>::NormalizeSelf()
|
||||
{
|
||||
double length = Magnitude();
|
||||
|
||||
// Prevent division by 0
|
||||
if (length == 0)
|
||||
{
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
w = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Load vector and length into registers
|
||||
__m256d __vec = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __len = _mm256_set1_pd(length);
|
||||
|
||||
// Divide
|
||||
__m256d __prod = _mm256_div_pd(__vec, __len);
|
||||
|
||||
// Extract and set values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x /= length;
|
||||
y /= length;
|
||||
z /= length;
|
||||
w /= length;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// You can't normalize an int vector, ffs!
|
||||
// But we need an implementation for T=int
|
||||
template<>
|
||||
void Vector4<int>::NormalizeSelf()
|
||||
{
|
||||
std::cerr << "Stop normalizing int-vectors!!" << std::endl;
|
||||
x = 0;
|
||||
y = 0;
|
||||
z = 0;
|
||||
w = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
|
||||
{
|
||||
return
|
||||
(::Eule::Math::Similar(x, other.x, epsilon)) &&
|
||||
(::Eule::Math::Similar(y, other.y, epsilon)) &&
|
||||
(::Eule::Math::Similar(z, other.z, epsilon)) &&
|
||||
(::Eule::Math::Similar(w, other.w, epsilon))
|
||||
;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<int> Vector4<T>::ToInt() const
|
||||
{
|
||||
return Vector4<int>((int)x, (int)y, (int)z, (int)w);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<double> Vector4<T>::ToDouble() const
|
||||
{
|
||||
return Vector4<double>((double)x, (double)y, (double)z, (double)w);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T& Vector4<T>::operator[](std::size_t idx)
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
case 3:
|
||||
return w;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& Vector4<T>::operator[](std::size_t idx) const
|
||||
{
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
case 3:
|
||||
return w;
|
||||
default:
|
||||
throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t; // Inverse t
|
||||
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
__m256d __t = _mm256_set1_pd(t);
|
||||
__m256d __it = _mm256_set1_pd(it); // Inverse t
|
||||
|
||||
// Procedure:
|
||||
// (__vector_self * __it) + (__vector_other * __t)
|
||||
|
||||
__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
|
||||
|
||||
__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
|
||||
__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
|
||||
|
||||
// Retrieve result, and apply it
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
w = sum[3];
|
||||
|
||||
#else
|
||||
|
||||
x = it * x + t * other.x;
|
||||
y = it * y + t * other.y;
|
||||
z = it * z + t * other.z;
|
||||
w = it * w + t * other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
|
||||
{
|
||||
const double it = 1.0 - t;
|
||||
|
||||
x = (int)(it * (double)x + t * (double)other.x);
|
||||
y = (int)(it * (double)y + t * (double)other.y);
|
||||
z = (int)(it * (double)z + t * (double)other.z);
|
||||
w = (int)(it * (double)w + t * (double)other.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
|
||||
{
|
||||
Vector4d copy(*this);
|
||||
copy.LerpSelf(other, t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
|
||||
{
|
||||
Vector4d copy(this->ToDouble());
|
||||
copy.LerpSelf(other.ToDouble(), t);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
return Vector4<double>(
|
||||
sum[0],
|
||||
sum[1],
|
||||
sum[2],
|
||||
sum[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z,
|
||||
w + other.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
z + other.z,
|
||||
w + other.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator+=(const Vector4<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Add the components
|
||||
__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double sum[4];
|
||||
_mm256_storeu_pd(sum, __sum);
|
||||
|
||||
x = sum[0];
|
||||
y = sum[1];
|
||||
z = sum[2];
|
||||
w = sum[3];
|
||||
|
||||
#else
|
||||
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator+=(const Vector4<T>& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and return these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
return Vector4<double>(
|
||||
diff[0],
|
||||
diff[1],
|
||||
diff[2],
|
||||
diff[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z,
|
||||
w - other.w
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x - other.x,
|
||||
y - other.y,
|
||||
z - other.z,
|
||||
w - other.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator-=(const Vector4<double>& other)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
|
||||
|
||||
// Subtract the components
|
||||
__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double diff[4];
|
||||
_mm256_storeu_pd(diff, __diff);
|
||||
|
||||
x = diff[0];
|
||||
y = diff[1];
|
||||
z = diff[2];
|
||||
w = diff[3];
|
||||
|
||||
#else
|
||||
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
w -= other.w;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator-=(const Vector4<T>& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
w -= other.w;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator*(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector4<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2],
|
||||
prod[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale,
|
||||
w * scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator*(const T scale) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x * scale,
|
||||
y * scale,
|
||||
z * scale,
|
||||
w * scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator*=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Multiply the components
|
||||
__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
w *= scale;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator*=(const T scale)
|
||||
{
|
||||
x *= scale;
|
||||
y *= scale;
|
||||
z *= scale;
|
||||
w *= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator/(const double scale) const
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and return these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
return Vector4<double>(
|
||||
prod[0],
|
||||
prod[1],
|
||||
prod[2],
|
||||
prod[3]
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
return Vector4<double>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale,
|
||||
w / scale
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator/(const T scale) const
|
||||
{
|
||||
return Vector4<T>(
|
||||
x / scale,
|
||||
y / scale,
|
||||
z / scale,
|
||||
w / scale
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void Vector4<double>::operator/=(const double scale)
|
||||
{
|
||||
#ifndef _EULE_NO_INTRINSICS_
|
||||
|
||||
// Move vector components and factors into registers
|
||||
__m256d __vector_self = _mm256_set_pd(w, z, y, x);
|
||||
__m256d __scalar = _mm256_set1_pd(scale);
|
||||
|
||||
// Divide the components
|
||||
__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
|
||||
|
||||
// Retrieve and apply these values
|
||||
double prod[4];
|
||||
_mm256_storeu_pd(prod, __prod);
|
||||
|
||||
x = prod[0];
|
||||
y = prod[1];
|
||||
z = prod[2];
|
||||
w = prod[3];
|
||||
|
||||
#else
|
||||
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
w /= scale;
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator/=(const T scale)
|
||||
{
|
||||
x /= scale;
|
||||
y /= scale;
|
||||
z /= scale;
|
||||
w /= scale;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::operator==(const Vector4<T>& other) const
|
||||
{
|
||||
return
|
||||
(x == other.x) &&
|
||||
(y == other.y) &&
|
||||
(z == other.z) &&
|
||||
(w == other.w);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector4<double> newVec;
|
||||
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
|
||||
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
|
||||
|
||||
return newVec;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
|
||||
{
|
||||
Vector4<double> newVec;
|
||||
|
||||
newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
|
||||
newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
|
||||
newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
|
||||
newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
|
||||
|
||||
return Vector4<int>(
|
||||
(int)newVec.x,
|
||||
(int)newVec.y,
|
||||
(int)newVec.z,
|
||||
(int)newVec.w
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Good, optimized chad version for doubles
|
||||
template<>
|
||||
void Vector4<double>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector4<double> buffer = *this;
|
||||
|
||||
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
|
||||
// idk right now. check that if something doesn't work
|
||||
x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
|
||||
y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
|
||||
z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
|
||||
w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Vector4<T> Vector4<T>::operator-() const
|
||||
{
|
||||
return Vector4<T>(
|
||||
-x,
|
||||
-y,
|
||||
-z,
|
||||
-w
|
||||
);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator=(const Vector4<T>& other)
|
||||
{
|
||||
x = other.x;
|
||||
y = other.y;
|
||||
z = other.z;
|
||||
w = other.w;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Vector4<T>::operator=(Vector4<T>&& other) noexcept
|
||||
{
|
||||
x = std::move(other.x);
|
||||
y = std::move(other.y);
|
||||
z = std::move(other.z);
|
||||
w = std::move(other.w);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow, lame version for intcels
|
||||
template<>
|
||||
void Vector4<int>::operator*=(const Matrix4x4& mat)
|
||||
{
|
||||
Vector4<double> buffer(x, y, z, w);
|
||||
|
||||
// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
|
||||
// idk right now. check that if something doesn't work
|
||||
x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
|
||||
y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
|
||||
z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
|
||||
w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Vector4<T>::operator!=(const Vector4<T>& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
template class Vector4<int>;
|
||||
template class Vector4<double>;
|
||||
|
||||
// Some handy predefines
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
|
||||
template <typename T>
|
||||
const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
|
||||
|
||||
}
|
||||
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#define EULE_VERSION (0.1)
|
||||
#define EULE_VERSION (0.11)
|
Loading…
x
Reference in New Issue
Block a user