Fix build warnings on macos

2022-02-08 10:47:31 +01:00
parent c86380f8ff
commit fa825e9167
8 changed files with 2441 additions and 2441 deletions
--- a/Eule/Quaternion.cpp
+++ b/Eule/Quaternion.cpp
@@ -2,6 +2,7 @@
 #include "Constants.h"
 #include <algorithm>
 #include <functional>
+#include <cmath>

 //#define _EULE_NO_INTRINSICS_
 #ifndef _EULE_NO_INTRINSICS_
--- a/Eule/Vector2.cpp
+++ b/Eule/Vector2.cpp
@@ -1,720 +1,15 @@
 #include "Vector2.h"
-#include "Math.h"
-#include <iostream>
-
-//#define _EULE_NO_INTRINSICS_
-#ifndef _EULE_NO_INTRINSICS_
-#include <immintrin.h>
-#endif
-
-using namespace Eule;
-
-/*
-	NOTE:
-	Here you will find bad, unoptimized methods for T=int.
-	This is because the compiler needs a method for each type in each instantiation of the template!
-	I can't generalize the methods when heavily optimizing for doubles.
-	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
-	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
-*/
-
-// Good, optimized chad version for doubles
-template<>
-double Vector2<double>::DotProduct(const Vector2<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components into registers
-	__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
-	__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
-
-	// Define bitmask, and execute computation
-	const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
-	__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
-
-	// Retrieve result, and return it
-	float result[8];
-	_mm256_storeu_ps(result, __dot);
-
-	return result[0];
-
-	#else
-	return (x * other.x) +
-		   (y * other.y);
-	#endif
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector2<int>::DotProduct(const Vector2<int>& other) const
-{
-	int iDot = (x * other.x) +
-			   (y * other.y);
-
-	return (double)iDot;
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-double Vector2<double>::CrossProduct(const Vector2<double>& other) const
-{
-	return (x * other.y) -
-		   (y * other.x);
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector2<int>::CrossProduct(const Vector2<int>& other) const
-{
-	int iCross = (x * other.y) -
-				 (y * other.x);
-
-	return (double)iCross;
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-double Vector2<double>::SqrMagnitude() const
-{
-	// x.DotProduct(x) == x.SqrMagnitude()
-	return DotProduct(*this);
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector2<int>::SqrMagnitude() const
-{
-	int iSqrMag = x*x + y*y;
-	return (double)iSqrMag;
-}
-
-template<typename T>
-double Vector2<T>::Magnitude() const
-{
-	return sqrt(SqrMagnitude());
-}
-
-
-template<>
-Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Load vectors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
-
-	// Multiply them
-	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
-
-	// Retrieve result
-	double result[4];
-	_mm256_storeu_pd(result, __product);
-
-	// Return value
-	return Vector2<double>(
-			result[0],
-			result[1]
-		);
-
-	#else
-
-	return Vector2<double>(
-			x * scalar.x,
-			y * scalar.y
-		);
-	#endif
-}
-
-template<>
-Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
-{
-	return Vector2<int>(
-			x * scalar.x,
-			y * scalar.y
-	);
-}
-
-
-template<typename T>
-Vector2<double> Vector2<T>::Normalize() const
-{
-	Vector2<double> norm(x, y);
-	norm.NormalizeSelf();
-
-	return norm;
-}
-
-// Method to normalize a Vector2d
-template<>
-void Vector2<double>::NormalizeSelf()
-{
-	double length = Magnitude();
-
-	// Prevent division by 0
-	if (length == 0)
-	{
-		x = 0;
-		y = 0;
-	}
-	else
-	{
-		#ifndef _EULE_NO_INTRINSICS_
-
-		// Load vector and length into registers
-		__m256d __vec = _mm256_set_pd(0, 0, y, x);
-		__m256d __len = _mm256_set1_pd(length);
-
-		// Divide
-		__m256d __prod = _mm256_div_pd(__vec, __len);
-
-		// Extract and set values
-		double prod[4];
-		_mm256_storeu_pd(prod, __prod);
-
-		x = prod[0];
-		y = prod[1];
-
-		#else
-
-		x /= length;
-		y /= length;
-
-		#endif
-	}
-
-	return;
-}
-
-// You can't normalize an int vector, ffs!
-// But we need an implementation for T=int
-template<>
-void Vector2<int>::NormalizeSelf()
-{
-	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
-	x = 0;
-	y = 0;
-
-	return;
-}
-
-
-// Good, optimized chad version for doubles
-template<>
-void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
-{
-	const double it = 1.0 - t; // Inverse t
-
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
-	__m256d __t = _mm256_set1_pd(t);
-	__m256d __it = _mm256_set1_pd(it); // Inverse t
-
-	// Procedure:
-	// (__vector_self * __it) + (__vector_other * __t)
-
-	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
-
-	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
-	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
-
-	// Retrieve result, and apply it
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-	
-	#else
-
-	x = it * x + t * other.x;
-	y = it * y + t * other.y;
-
-	#endif
-	
-	return;
-}
-
-
-
-// Slow, lame version for intcels
-template<>
-void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
-{
-	const double it = 1.0 - t; // Inverse t
-
-	x = (int)(it * (double)x + t * (double)other.x);
-	y = (int)(it * (double)y + t * (double)other.y);
-
-	return;
-}
-
-template<>
-Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
-{
-	Vector2d copy(*this);
-	copy.LerpSelf(other, t);
-
-	return copy;
-}
-
-template<>
-Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
-{
-	Vector2d copy(this->ToDouble());
-	copy.LerpSelf(other.ToDouble(), t);
-
-	return copy;
-}
-
-
-
-template<typename T>
-T& Vector2<T>::operator[](std::size_t idx)
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	default:
-		throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
-	}
-}
-
-template<typename T>
-const T& Vector2<T>::operator[](std::size_t idx) const
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	default:
-		throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
-	}
-}
-
-template<typename T>
-bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
-{
-	return
-		(::Math::Similar(x, other.x, epsilon)) &&
-		(::Math::Similar(y, other.y, epsilon))
-	;
-}
-
-template<typename T>
-Vector2<int> Vector2<T>::ToInt() const
-{
-	return Vector2<int>((int)x, (int)y);
-}
-
-template<typename T>
-Vector2<double> Vector2<T>::ToDouble() const
-{
-	return Vector2<double>((double)x, (double)y);
-}
-
-template<>
-Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	return Vector2<double>(
-			sum[0],
-			sum[1]
-		);
-
-	#else
-
-	return Vector2<double>(
-		x + other.x,
-		y + other.y
-	);
-	#endif
-}
-
-template<typename T>
-Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
-{
-	return Vector2<T>(
-			x + other.x,
-			y + other.y
-		);
-}
-
-
-
-template<>
-void Vector2<double>::operator+=(const Vector2<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-
-	#else
-
-	x += other.x;
-	y += other.y;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector2<T>::operator+=(const Vector2<T>& other)
-{
-	x += other.x;
-	y += other.y;
-	return;
-}
-
-
-
-template<>
-Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	return Vector2<double>(
-			diff[0],
-			diff[1]
-		);
-
-	#else
-
-	return Vector2<double>(
-			x - other.x,
-			y - other.y
-		);
-	#endif
-}
-
-template<typename T>
-Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
-{
-	return Vector2<T>(
-		x - other.x,
-		y - other.y
-	);
-}
-
-
-
-template<>
-void Vector2<double>::operator-=(const Vector2<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	x = diff[0];
-	y = diff[1];
-
-	#else
-
-	x -= other.x;
-	y -= other.y;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector2<T>::operator-=(const Vector2<T>& other)
-{
-	x -= other.x;
-	y -= other.y;
-	return;
-}
-
-
-
-template<>
-Vector2<double> Vector2<double>::operator*(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector2<double>(
-			prod[0],
-			prod[1]
-		);
-
-	#else
-
-	return Vector2<double>(
-			x * scale,
-			y * scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector2<T> Vector2<T>::operator*(const T scale) const
-{
-	return Vector2<T>(
-		x * scale,
-		y * scale
-	);
-}
-
-
-
-template<>
-void Vector2<double>::operator*=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-
-	#else
-
-	x *= scale;
-	y *= scale;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector2<T>::operator*=(const T scale)
-{
-	x *= scale;
-	y *= scale;
-	return;
-}
-
-
-
-template<>
-Vector2<double> Vector2<double>::operator/(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector2<double>(
-		prod[0],
-		prod[1]
-	);
-
-	#else
-
-	return Vector2<double>(
-			x / scale,
-			y / scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector2<T> Vector2<T>::operator/(const T scale) const
-{
-	return Vector2<T>(
-			x / scale,
-			y / scale
-		);
-}
-
-
-
-template<>
-void Vector2<double>::operator/=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-
-	#else
-
-	x /= scale;
-	y /= scale;
-
-	#endif
-	return;
-}
-
-template<typename T>
-void Vector2<T>::operator/=(const T scale)
-{
-	x /= scale;
-	y /= scale;
-	return;
-}
-
-
-
-template<typename T>
-void Vector2<T>::operator=(const Vector2<T>& other)
-{
-	x = other.x;
-	y = other.y;
-
-	return;
-}
-
-template<typename T>
-void Vector2<T>::operator=(Vector2<T>&& other) noexcept
-{
-	x = std::move(other.x);
-	y = std::move(other.y);
-
-	return;
-}
-
-template<typename T>
-bool Vector2<T>::operator==(const Vector2<T>& other) const
-{
-	return
-		(x == other.x) &&
-		(y == other.y);
-}
-
-template<typename T>
-bool Vector2<T>::operator!=(const Vector2<T>& other) const
-{
-	return !operator==(other);
-}
-
-template<typename T>
-Vector2<T> Vector2<T>::operator-() const
-{
-	return Vector2<T>(
-		-x,
-		-y
-	);
-}
-
-// Don't want these includes above the other stuff
 #include "Vector3.h"
 #include "Vector4.h"
+
 template<typename T>
-Vector2<T>::operator Vector3<T>() const
+Eule::Vector2<T>::operator Eule::Vector3<T>() const
 {
 	return Vector3<T>(x, y, 0);
 }

 template<typename T>
-Vector2<T>::operator Vector4<T>() const
+Eule::Vector2<T>::operator Eule::Vector4<T>() const
 {
 	return Vector4<T>(x, y, 0, 0);
 }
-
-template class Vector2<int>;
-template class Vector2<double>;
-
-// Some handy predefines
-template <typename T>
-const Vector2<double> Vector2<T>::up(0, 1);
-template <typename T>
-const Vector2<double> Vector2<T>::down(0, -1);
-template <typename T>
-const Vector2<double> Vector2<T>::right(1, 0);
-template <typename T>
-const Vector2<double> Vector2<T>::left(-1, 0);
-template <typename T>
-const Vector2<double> Vector2<T>::one(1, 1);
-template <typename T>
-const Vector2<double> Vector2<T>::zero(0, 0);
--- a/Eule/Vector2.h
+++ b/Eule/Vector2.h
@@ -1,6 +1,22 @@
 #pragma once
 #include <cstdlib>
 #include <sstream>
+#include "Math.h"
+#include <iostream>
+
+//#define _EULE_NO_INTRINSICS_
+#ifndef _EULE_NO_INTRINSICS_
+#include <immintrin.h>
+#endif
+
+/*
+	NOTE:
+	Here you will find bad, unoptimized methods for T=int.
+	This is because the compiler needs a method for each type in each instantiation of the template!
+	I can't generalize the methods when heavily optimizing for doubles.
+	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
+	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
+*/

 namespace Eule
 {
@@ -100,4 +116,690 @@ namespace Eule

 	typedef Vector2<int> Vector2i;
 	typedef Vector2<double> Vector2d;
+
+// Good, optimized chad version for doubles
+template<>
+double Vector2<double>::DotProduct(const Vector2<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components into registers
+	__m256 __vector_self = _mm256_set_ps(0,0,0,0,0,0, (float)y, (float)x);
+	__m256 __vector_other = _mm256_set_ps(0,0,0,0,0,0, (float)other.y, (float)other.x);
+
+	// Define bitmask, and execute computation
+	const int mask = 0x31; // -> 0011 1000 -> use positions 0011 (last 2) of the vectors supplied, and place them in 1000 (first only) element of __dot
+	__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
+
+	// Retrieve result, and return it
+	float result[8];
+	_mm256_storeu_ps(result, __dot);
+
+	return result[0];
+
+	#else
+	return (x * other.x) +
+		   (y * other.y);
+	#endif
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector2<int>::DotProduct(const Vector2<int>& other) const
+{
+	int iDot = (x * other.x) +
+			   (y * other.y);
+
+	return (double)iDot;
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+double Vector2<double>::CrossProduct(const Vector2<double>& other) const
+{
+	return (x * other.y) -
+		   (y * other.x);
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector2<int>::CrossProduct(const Vector2<int>& other) const
+{
+	int iCross = (x * other.y) -
+				 (y * other.x);
+
+	return (double)iCross;
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+double Vector2<double>::SqrMagnitude() const
+{
+	// x.DotProduct(x) == x.SqrMagnitude()
+	return DotProduct(*this);
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector2<int>::SqrMagnitude() const
+{
+	int iSqrMag = x*x + y*y;
+	return (double)iSqrMag;
+}
+
+template<typename T>
+double Vector2<T>::Magnitude() const
+{
+	return sqrt(SqrMagnitude());
+}
+
+
+template<>
+Vector2<double> Vector2<double>::VectorScale(const Vector2<double>& scalar) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Load vectors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_scalar = _mm256_set_pd(0, 0, scalar.y, scalar.x);
+
+	// Multiply them
+	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
+
+	// Retrieve result
+	double result[4];
+	_mm256_storeu_pd(result, __product);
+
+	// Return value
+	return Vector2<double>(
+			result[0],
+			result[1]
+		);
+
+	#else
+
+	return Vector2<double>(
+			x * scalar.x,
+			y * scalar.y
+		);
+	#endif
+}
+
+template<>
+Vector2<int> Vector2<int>::VectorScale(const Vector2<int>& scalar) const
+{
+	return Vector2<int>(
+			x * scalar.x,
+			y * scalar.y
+	);
+}
+
+
+template<typename T>
+Vector2<double> Vector2<T>::Normalize() const
+{
+	Vector2<double> norm(x, y);
+	norm.NormalizeSelf();
+
+	return norm;
+}
+
+// Method to normalize a Vector2d
+template<>
+void Vector2<double>::NormalizeSelf()
+{
+	double length = Magnitude();
+
+	// Prevent division by 0
+	if (length == 0)
+	{
+		x = 0;
+		y = 0;
+	}
+	else
+	{
+		#ifndef _EULE_NO_INTRINSICS_
+
+		// Load vector and length into registers
+		__m256d __vec = _mm256_set_pd(0, 0, y, x);
+		__m256d __len = _mm256_set1_pd(length);
+
+		// Divide
+		__m256d __prod = _mm256_div_pd(__vec, __len);
+
+		// Extract and set values
+		double prod[4];
+		_mm256_storeu_pd(prod, __prod);
+
+		x = prod[0];
+		y = prod[1];
+
+		#else
+
+		x /= length;
+		y /= length;
+
+		#endif
+	}
+
+	return;
+}
+
+// You can't normalize an int vector, ffs!
+// But we need an implementation for T=int
+template<>
+void Vector2<int>::NormalizeSelf()
+{
+	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
+	x = 0;
+	y = 0;
+
+	return;
+}
+
+
+// Good, optimized chad version for doubles
+template<>
+void Vector2<double>::LerpSelf(const Vector2<double>& other, double t)
+{
+	const double it = 1.0 - t; // Inverse t
+
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
+	__m256d __t = _mm256_set1_pd(t);
+	__m256d __it = _mm256_set1_pd(it); // Inverse t
+
+	// Procedure:
+	// (__vector_self * __it) + (__vector_other * __t)
+
+	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
+
+	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
+	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
+
+	// Retrieve result, and apply it
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+	
+	#else
+
+	x = it * x + t * other.x;
+	y = it * y + t * other.y;
+
+	#endif
+	
+	return;
+}
+
+
+
+// Slow, lame version for intcels
+template<>
+void Vector2<int>::LerpSelf(const Vector2<int>& other, double t)
+{
+	const double it = 1.0 - t; // Inverse t
+
+	x = (int)(it * (double)x + t * (double)other.x);
+	y = (int)(it * (double)y + t * (double)other.y);
+
+	return;
+}
+
+template<>
+Vector2<double> Vector2<double>::Lerp(const Vector2<double>& other, double t) const
+{
+	Vector2d copy(*this);
+	copy.LerpSelf(other, t);
+
+	return copy;
+}
+
+template<>
+Vector2<double> Vector2<int>::Lerp(const Vector2<int>& other, double t) const
+{
+	Vector2d copy(this->ToDouble());
+	copy.LerpSelf(other.ToDouble(), t);
+
+	return copy;
+}
+
+
+
+template<typename T>
+T& Vector2<T>::operator[](std::size_t idx)
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	default:
+		throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
+	}
+}
+
+template<typename T>
+const T& Vector2<T>::operator[](std::size_t idx) const
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	default:
+		throw std::out_of_range("Array descriptor on Vector2<T> out of range!");
+	}
+}
+
+template<typename T>
+bool Vector2<T>::Similar(const Vector2<T>& other, double epsilon) const
+{
+	return
+		(::Eule::Math::Similar(x, other.x, epsilon)) &&
+		(::Eule::Math::Similar(y, other.y, epsilon))
+	;
+}
+
+template<typename T>
+Vector2<int> Vector2<T>::ToInt() const
+{
+	return Vector2<int>((int)x, (int)y);
+}
+
+template<typename T>
+Vector2<double> Vector2<T>::ToDouble() const
+{
+	return Vector2<double>((double)x, (double)y);
+}
+
+template<>
+Vector2<double> Vector2<double>::operator+(const Vector2<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	return Vector2<double>(
+			sum[0],
+			sum[1]
+		);
+
+	#else
+
+	return Vector2<double>(
+		x + other.x,
+		y + other.y
+	);
+	#endif
+}
+
+template<typename T>
+Vector2<T> Vector2<T>::operator+(const Vector2<T>& other) const
+{
+	return Vector2<T>(
+			x + other.x,
+			y + other.y
+		);
+}
+
+
+
+template<>
+void Vector2<double>::operator+=(const Vector2<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+
+	#else
+
+	x += other.x;
+	y += other.y;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector2<T>::operator+=(const Vector2<T>& other)
+{
+	x += other.x;
+	y += other.y;
+	return;
+}
+
+
+
+template<>
+Vector2<double> Vector2<double>::operator-(const Vector2<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	return Vector2<double>(
+			diff[0],
+			diff[1]
+		);
+
+	#else
+
+	return Vector2<double>(
+			x - other.x,
+			y - other.y
+		);
+	#endif
+}
+
+template<typename T>
+Vector2<T> Vector2<T>::operator-(const Vector2<T>& other) const
+{
+	return Vector2<T>(
+		x - other.x,
+		y - other.y
+	);
+}
+
+
+
+template<>
+void Vector2<double>::operator-=(const Vector2<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, 0, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	x = diff[0];
+	y = diff[1];
+
+	#else
+
+	x -= other.x;
+	y -= other.y;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector2<T>::operator-=(const Vector2<T>& other)
+{
+	x -= other.x;
+	y -= other.y;
+	return;
+}
+
+
+
+template<>
+Vector2<double> Vector2<double>::operator*(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector2<double>(
+			prod[0],
+			prod[1]
+		);
+
+	#else
+
+	return Vector2<double>(
+			x * scale,
+			y * scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector2<T> Vector2<T>::operator*(const T scale) const
+{
+	return Vector2<T>(
+		x * scale,
+		y * scale
+	);
+}
+
+
+
+template<>
+void Vector2<double>::operator*=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+
+	#else
+
+	x *= scale;
+	y *= scale;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector2<T>::operator*=(const T scale)
+{
+	x *= scale;
+	y *= scale;
+	return;
+}
+
+
+
+template<>
+Vector2<double> Vector2<double>::operator/(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector2<double>(
+		prod[0],
+		prod[1]
+	);
+
+	#else
+
+	return Vector2<double>(
+			x / scale,
+			y / scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector2<T> Vector2<T>::operator/(const T scale) const
+{
+	return Vector2<T>(
+			x / scale,
+			y / scale
+		);
+}
+
+
+
+template<>
+void Vector2<double>::operator/=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, 0, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+
+	#else
+
+	x /= scale;
+	y /= scale;
+
+	#endif
+	return;
+}
+
+template<typename T>
+void Vector2<T>::operator/=(const T scale)
+{
+	x /= scale;
+	y /= scale;
+	return;
+}
+
+
+
+template<typename T>
+void Vector2<T>::operator=(const Vector2<T>& other)
+{
+	x = other.x;
+	y = other.y;
+
+	return;
+}
+
+template<typename T>
+void Vector2<T>::operator=(Vector2<T>&& other) noexcept
+{
+	x = std::move(other.x);
+	y = std::move(other.y);
+
+	return;
+}
+
+template<typename T>
+bool Vector2<T>::operator==(const Vector2<T>& other) const
+{
+	return
+		(x == other.x) &&
+		(y == other.y);
+}
+
+template<typename T>
+bool Vector2<T>::operator!=(const Vector2<T>& other) const
+{
+	return !operator==(other);
+}
+
+template<typename T>
+Vector2<T> Vector2<T>::operator-() const
+{
+	return Vector2<T>(
+		-x,
+		-y
+	);
+}
+
+template class Vector2<int>;
+template class Vector2<double>;
+
+// Some handy predefines
+template <typename T>
+const Vector2<double> Vector2<T>::up(0, 1);
+template <typename T>
+const Vector2<double> Vector2<T>::down(0, -1);
+template <typename T>
+const Vector2<double> Vector2<T>::right(1, 0);
+template <typename T>
+const Vector2<double> Vector2<T>::left(-1, 0);
+template <typename T>
+const Vector2<double> Vector2<T>::one(1, 1);
+template <typename T>
+const Vector2<double> Vector2<T>::zero(0, 0);
 }
--- a/Eule/Vector3.cpp
+++ b/Eule/Vector3.cpp
@@ -1,928 +1,19 @@
 #include "Vector3.h"
-#include "Math.h"
-#include <iostream>

-//#define _EULE_NO_INTRINSICS_
-#ifndef _EULE_NO_INTRINSICS_
-#include <immintrin.h>
-#endif
-
-using namespace Eule;
-
-/*
-	NOTE:
-	Here you will find bad, unoptimized methods for T=int.
-	This is because the compiler needs a method for each type in each instantiation of the template!
-	I can't generalize the methods when heavily optimizing for doubles.
-	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
-	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
-*/
-
-// Good, optimized chad version for doubles
-template<>
-double Vector3<double>::DotProduct(const Vector3<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components into registers
-	__m256 __vector_self  = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x);
-	__m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x);
-
-	// Define bitmask, and execute computation
-	const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot
-	__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
-
-	// Retrieve result, and return it
-	float result[8];
-	_mm256_storeu_ps(result, __dot);
-
-	return result[0];
-
-	#else
-	return (x * other.x) +
-		   (y * other.y) +
-		   (z * other.z);
-	#endif
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector3<int>::DotProduct(const Vector3<int>& other) const
-{
-	int iDot = (x * other.x) + (y * other.y) + (z * other.z);
-	return (double)iDot;
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-Vector3<double> Vector3<double>::CrossProduct(const Vector3<double>& other) const
-{
-	Vector3<double> cp;
-	cp.x = (y * other.z) - (z * other.y);
-	cp.y = (z * other.x) - (x * other.z);
-	cp.z = (x * other.y) - (y * other.x);
-
-	return cp;
-}
-
-// Slow, lame version for intcels
-template<>
-Vector3<double> Vector3<int>::CrossProduct(const Vector3<int>& other) const
-{
-	Vector3<double> cp;
-	cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y);
-	cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z);
-	cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x);
-
-	return cp;
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-double Vector3<double>::SqrMagnitude() const
-{
-	// x.DotProduct(x) == x.SqrMagnitude()
-	return DotProduct(*this);
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector3<int>::SqrMagnitude() const
-{
-	int iSqrMag = x*x + y*y + z*z;
-	return (double)iSqrMag;
-}
-
-template <typename T>
-double Vector3<T>::Magnitude() const
-{
-	return sqrt(SqrMagnitude());
-}
-
-
-
-template<>
-Vector3<double> Vector3<double>::VectorScale(const Vector3<double>& scalar) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-	
-	// Load vectors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x);
-
-	// Multiply them
-	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
-
-	// Retrieve result
-	double result[4];
-	_mm256_storeu_pd(result, __product);
-
-	// Return value
-	return Vector3<double>(
-			result[0],
-			result[1],
-			result[2]
-		);
-
-	#else
-
-	return Vector3<double>(
-			x * scalar.x,
-			y * scalar.y,
-			z * scalar.z
-		);
-
-	#endif
-}
-
-template<>
-Vector3<int> Vector3<int>::VectorScale(const Vector3<int>& scalar) const
-{
-	return Vector3<int>(
-			x * scalar.x,
-			y * scalar.y,
-			z * scalar.z
-	);
-}
-
-
-
-template<typename T>
-Vector3<double> Vector3<T>::Normalize() const
-{
-	Vector3<double> norm(x, y, z);
-	norm.NormalizeSelf();
-
-	return norm;
-}
-
-// Method to normalize a Vector3d
-template<>
-void Vector3<double>::NormalizeSelf()
-{
-	const double length = Magnitude();
-
-	// Prevent division by 0
-	if (length == 0)
-	{
-		x = 0;
-		y = 0;
-		z = 0;
-	}
-	else
-	{
-		#ifndef _EULE_NO_INTRINSICS_
-
-		// Load vector and length into registers
-		__m256d __vec = _mm256_set_pd(0, z, y, x);
-		__m256d __len = _mm256_set1_pd(length);
-
-		// Divide
-		__m256d __prod = _mm256_div_pd(__vec, __len);
-
-		// Extract and set values
-		double prod[4];
-		_mm256_storeu_pd(prod, __prod);
-
-		x = prod[0];
-		y = prod[1];
-		z = prod[2];
-		
-		#else
-		
-		x /= length;
-		y /= length;
-		z /= length;
-		
-		#endif
-	}
-
-	return;
-}
-
-// You can't normalize an int vector, ffs!
-// But we need an implementation for T=int
-template<>
-void Vector3<int>::NormalizeSelf()
-{
-	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
-	x = 0;
-	y = 0;
-	z = 0;
-
-	return;
-}
-
-
-
-template<typename T>
-bool Vector3<T>::Similar(const Vector3<T>& other, double epsilon) const
-{
-	return
-		(::Math::Similar(x, other.x, epsilon)) &&
-		(::Math::Similar(y, other.y, epsilon)) &&
-		(::Math::Similar(z, other.z, epsilon))
-	;
-}
-
-template<typename T>
-Vector3<int> Vector3<T>::ToInt() const
-{
-	return Vector3<int>((int)x, (int)y, (int)z);
-}
-
-template<typename T>
-Vector3<double> Vector3<T>::ToDouble() const
-{
-	return Vector3<double>((double)x, (double)y, (double)z);
-}
-
-template<typename T>
-T& Vector3<T>::operator[](std::size_t idx)
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	case 2:
-		return z;
-	default:
-		throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
-	}
-}
-
-template<typename T>
-const T& Vector3<T>::operator[](std::size_t idx) const
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	case 2:
-		return z;
-	default:
-		throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
-	}
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-void Vector3<double>::LerpSelf(const Vector3<double>& other, double t)
-{
-	const double it = 1.0 - t; // Inverse t
-
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
-	__m256d __t = _mm256_set1_pd(t);
-	__m256d __it = _mm256_set1_pd(it); // Inverse t
-
-	// Procedure:
-	// (__vector_self * __it) + (__vector_other * __t)
-
-	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
-
-	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
-	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
-
-	// Retrieve result, and apply it
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-	z = sum[2];
-
-	#else
-
-	x = it*x + t*other.x;
-	y = it*y + t*other.y;
-	z = it*z + t*other.z;
-
-	#endif
-
-	return;
-}
-
-
-
-// Slow, lame version for intcels
-template<>
-void Vector3<int>::LerpSelf(const Vector3<int>& other, double t)
-{
-	const double it = 1.0 - t; // Inverse t
-
-	x = (int)(it * (double)x + t * (double)other.x);
-	y = (int)(it * (double)y + t * (double)other.y);
-	z = (int)(it * (double)z + t * (double)other.z);
-
-	return;
-}
-
-template<>
-Vector3<double> Vector3<double>::Lerp(const Vector3<double>& other, double t) const
-{
-	Vector3d copy(*this);
-	copy.LerpSelf(other, t);
-
-	return copy;
-}
-
-template<>
-Vector3<double> Vector3<int>::Lerp(const Vector3<int>& other, double t) const
-{
-	Vector3d copy(this->ToDouble());
-	copy.LerpSelf(other.ToDouble(), t);
-
-	return copy;
-}
-
-
-
-template<>
-Vector3<double> Vector3<double>::operator+(const Vector3<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	return Vector3<double>(
-			sum[0],
-			sum[1],
-			sum[2]
-		);
-
-	#else
-
-	return Vector3<double>(
-		x + other.x,
-		y + other.y,
-		z + other.z
-	);
-	#endif
-}
-
-template<typename T>
-Vector3<T> Vector3<T>::operator+(const Vector3<T>& other) const
-{
-	return Vector3<T>(
-			x + other.x,
-			y + other.y,
-			z + other.z
-		);
-}
-
-
-
-template<>
-void Vector3<double>::operator+=(const Vector3<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-	z = sum[2];
-
-	#else
-
-	x += other.x;
-	y += other.y;
-	z += other.z;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector3<T>::operator+=(const Vector3<T>& other)
-{
-	x += other.x;
-	y += other.y;
-	z += other.z;
-	return;
-}
-
-
-
-template<>
-Vector3<double> Vector3<double>::operator-(const Vector3<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	return Vector3<double>(
-			diff[0],
-			diff[1],
-			diff[2]
-		);
-
-	#else
-
-	return Vector3<double>(
-			x - other.x,
-			y - other.y,
-			z - other.z
-		);
-	#endif
-}
-
-template<typename T>
-Vector3<T> Vector3<T>::operator-(const Vector3<T>& other) const
-{
-	return Vector3<T>(
-		x - other.x,
-		y - other.y,
-		z - other.z
-	);
-}
-
-
-
-template<>
-void Vector3<double>::operator-=(const Vector3<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	x = diff[0];
-	y = diff[1];
-	z = diff[2];
-
-	#else
-
-	x -= other.x;
-	y -= other.y;
-	z -= other.z;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector3<T>::operator-=(const Vector3<T>& other)
-{
-	x -= other.x;
-	y -= other.y;
-	z -= other.z;
-	return;
-}
-
-
-
-template<>
-Vector3<double> Vector3<double>::operator*(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector3<double>(
-			prod[0],
-			prod[1],
-			prod[2]
-		);
-
-	#else
-
-	return Vector3<double>(
-			x * scale,
-			y * scale,
-			z * scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector3<T> Vector3<T>::operator*(const T scale) const
-{
-	return Vector3<T>(
-		x * scale,
-		y * scale,
-		z * scale
-	);
-}
-
-
-
-template<>
-void Vector3<double>::operator*=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-	z = prod[2];
-
-	#else
-
-	x *= scale;
-	y *= scale;
-	z *= scale;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector3<T>::operator*=(const T scale)
-{
-	x *= scale;
-	y *= scale;
-	z *= scale;
-	return;
-}
-
-
-
-template<>
-Vector3<double> Vector3<double>::operator/(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector3<double>(
-		prod[0],
-		prod[1],
-		prod[2]
-	);
-
-	#else
-
-	return Vector3<double>(
-			x / scale,
-			y / scale,
-			z / scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector3<T> Vector3<T>::operator/(const T scale) const
-{
-	return Vector3<T>(
-			x / scale,
-			y / scale,
-			z / scale
-		);
-}
-
-
-
-template<>
-void Vector3<double>::operator/=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-	z = prod[2];
-
-	#else
-
-	x /= scale;
-	y /= scale;
-	z /= scale;
-
-	#endif
-	return;
-}
-
-template<typename T>
-void Vector3<T>::operator/=(const T scale)
-{
-	x /= scale;
-	y /= scale;
-	z /= scale;
-	return;
-}
-
-
-// Good, optimized chad version for doubles
-template<>
-Vector3<double> Vector3<double>::operator*(const Matrix4x4& mat) const
-{
-	Vector3<double> newVec;
-
-	#ifndef _EULE_NO_INTRINSICS_
-	// Store x, y, and z values
-	__m256d __vecx = _mm256_set1_pd(x);
-	__m256d __vecy = _mm256_set1_pd(y);
-	__m256d __vecz = _mm256_set1_pd(z);
-
-	// Store matrix values
-	__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
-	__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
-	__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
-
-	// Multiply x, y, z and matrix values
-	__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
-	__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
-	__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
-
-	// Sum up the products
-	__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
-
-	// Store translation values
-	__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
-
-	// Add the translation values
-	__m256d __final = _mm256_add_pd(__sum, __translation);
-
-	double dfinal[4];
-
-	_mm256_storeu_pd(dfinal, __final);
-
-	newVec.x = dfinal[3];
-	newVec.y = dfinal[2];
-	newVec.z = dfinal[1];
-
-	#else
-	// Rotation, Scaling
-	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
-	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
-	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
-
-	// Translation
-	newVec.x += mat[0][3];
-	newVec.y += mat[1][3];
-	newVec.z += mat[2][3];
-	#endif
-
-	return newVec;
-}
-
-// Slow, lame version for intcels
-template<>
-Vector3<int> Vector3<int>::operator*(const Matrix4x4& mat) const
-{
-	Vector3<double> newVec;
-
-	// Rotation, Scaling
-	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
-	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
-	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
-
-	// Translation
-	newVec.x += mat[0][3];
-	newVec.y += mat[1][3];
-	newVec.z += mat[2][3];
-
-	return Vector3<int>(
-		(int)newVec.x,
-		(int)newVec.y,
-		(int)newVec.z
-	);
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-void Vector3<double>::operator*=(const Matrix4x4& mat)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-	// Store x, y, and z values
-	__m256d __vecx = _mm256_set1_pd(x);
-	__m256d __vecy = _mm256_set1_pd(y);
-	__m256d __vecz = _mm256_set1_pd(z);
-
-	// Store matrix values
-	__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
-	__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
-	__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
-
-	// Multiply x, y, z and matrix values
-	__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
-	__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
-	__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
-
-	// Sum up the products
-	__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
-
-	// Store translation values
-	__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
-
-	// Add the translation values
-	__m256d __final = _mm256_add_pd(__sum, __translation);
-
-	double dfinal[4];
-
-	_mm256_storeu_pd(dfinal, __final);
-
-	x = dfinal[3];
-	y = dfinal[2];
-	z = dfinal[1];
-
-	#else
-	Vector3<double> buffer = *this;
-	x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z);
-	y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z);
-	z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z);
-	
-	// Translation
-	x += mat[0][3];
-	y += mat[1][3];
-	z += mat[2][3];
-	#endif
-
-	return;
-}
-
-template<typename T>
-Vector3<T> Vector3<T>::operator-() const
-{
-	return Vector3<T>(
-		-x,
-		-y,
-		-z
-	);
-}
-
-template<typename T>
-void Vector3<T>::operator=(const Vector3<T>& other)
-{
-	x = other.x;
-	y = other.y;
-	z = other.z;
-
-	return;
-}
-
-template<typename T>
-void Vector3<T>::operator=(Vector3<T>&& other) noexcept
-{
-	x = std::move(other.x);
-	y = std::move(other.y);
-	z = std::move(other.z);
-
-	return;
-}
-
-// Slow, lame version for intcels
-template<>
-void Vector3<int>::operator*=(const Matrix4x4& mat)
-{
-	Vector3<double> buffer(x, y, z);
-
-	x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z));
-	y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z));
-	z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z));
-
-	// Translation
-	x += (int)mat[0][3];
-	y += (int)mat[1][3];
-	z += (int)mat[2][3];
-
-	return;
-}
-
-
-
-template<typename T>
-bool Vector3<T>::operator==(const Vector3<T>& other) const
-{
-	return
-		(x == other.x) &&
-		(y == other.y) &&
-		(z == other.z);
-}
-
-template<typename T>
-bool Vector3<T>::operator!=(const Vector3<T>& other) const
-{
-	return !operator==(other);
-}


+#include "Vector3.h"
 #include "Vector2.h"
 #include "Vector4.h"
+
 template<typename T>
-Vector3<T>::operator Vector2<T>() const
+Eule::Vector3<T>::operator Eule::Vector2<T>() const
 {
 	return Vector2<T>(x, y);
 }

 template<typename T>
-Vector3<T>::operator Vector4<T>() const
+Eule::Vector3<T>::operator Eule::Vector4<T>() const
 {
 	return Vector4<T>(x, y, z, 0);
 }
-
-template class Vector3<int>;
-template class Vector3<double>;
-
-// Some handy predefines
-template <typename T>
-const Vector3<double> Vector3<T>::up(0, 1, 0);
-template <typename T>
-const Vector3<double> Vector3<T>::down(0, -1, 0);
-template <typename T>
-const Vector3<double> Vector3<T>::right(1, 0, 0);
-template <typename T>
-const Vector3<double> Vector3<T>::left(-1, 0, 0);
-template <typename T>
-const Vector3<double> Vector3<T>::forward(0, 0, 1);
-template <typename T>
-const Vector3<double> Vector3<T>::backward(0, 0, -1);
-template <typename T>
-const Vector3<double> Vector3<T>::one(1, 1, 1);
-template <typename T>
-const Vector3<double> Vector3<T>::zero(0, 0, 0);
--- a/Eule/Vector3.h
+++ b/Eule/Vector3.h
@@ -5,6 +5,23 @@
 #include <sstream>
 #include "Matrix4x4.h"

+#include "Math.h"
+#include <iostream>
+
+//#define _EULE_NO_INTRINSICS_
+#ifndef _EULE_NO_INTRINSICS_
+#include <immintrin.h>
+#endif
+
+/*
+	NOTE:
+	Here you will find bad, unoptimized methods for T=int.
+	This is because the compiler needs a method for each type in each instantiation of the template!
+	I can't generalize the methods when heavily optimizing for doubles.
+	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
+	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
+*/
+
 namespace Eule
 {
 	template <typename T> class Vector2;
@@ -108,4 +125,899 @@ namespace Eule

 	typedef Vector3<int> Vector3i;
 	typedef Vector3<double> Vector3d;
+
+	// Good, optimized chad version for doubles
+template<>
+double Vector3<double>::DotProduct(const Vector3<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components into registers
+	__m256 __vector_self  = _mm256_set_ps(0,0,0,0,0, (float)z, (float)y, (float)x);
+	__m256 __vector_other = _mm256_set_ps(0,0,0,0,0, (float)other.z, (float)other.y, (float)other.x);
+
+	// Define bitmask, and execute computation
+	const int mask = 0x71; // -> 0111 1000 -> use positions 0111 (last 3) of the vectors supplied, and place them in 1000 (first only) element of __dot
+	__m256 __dot = _mm256_dp_ps(__vector_self, __vector_other, mask);
+
+	// Retrieve result, and return it
+	float result[8];
+	_mm256_storeu_ps(result, __dot);
+
+	return result[0];
+
+	#else
+	return (x * other.x) +
+		   (y * other.y) +
+		   (z * other.z);
+	#endif
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector3<int>::DotProduct(const Vector3<int>& other) const
+{
+	int iDot = (x * other.x) + (y * other.y) + (z * other.z);
+	return (double)iDot;
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+Vector3<double> Vector3<double>::CrossProduct(const Vector3<double>& other) const
+{
+	Vector3<double> cp;
+	cp.x = (y * other.z) - (z * other.y);
+	cp.y = (z * other.x) - (x * other.z);
+	cp.z = (x * other.y) - (y * other.x);
+
+	return cp;
+}
+
+// Slow, lame version for intcels
+template<>
+Vector3<double> Vector3<int>::CrossProduct(const Vector3<int>& other) const
+{
+	Vector3<double> cp;
+	cp.x = ((double)y * (double)other.z) - ((double)z * (double)other.y);
+	cp.y = ((double)z * (double)other.x) - ((double)x * (double)other.z);
+	cp.z = ((double)x * (double)other.y) - ((double)y * (double)other.x);
+
+	return cp;
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+double Vector3<double>::SqrMagnitude() const
+{
+	// x.DotProduct(x) == x.SqrMagnitude()
+	return DotProduct(*this);
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector3<int>::SqrMagnitude() const
+{
+	int iSqrMag = x*x + y*y + z*z;
+	return (double)iSqrMag;
+}
+
+template <typename T>
+double Vector3<T>::Magnitude() const
+{
+	return sqrt(SqrMagnitude());
+}
+
+
+
+template<>
+Vector3<double> Vector3<double>::VectorScale(const Vector3<double>& scalar) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+	
+	// Load vectors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_scalar = _mm256_set_pd(0, scalar.z, scalar.y, scalar.x);
+
+	// Multiply them
+	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
+
+	// Retrieve result
+	double result[4];
+	_mm256_storeu_pd(result, __product);
+
+	// Return value
+	return Vector3<double>(
+			result[0],
+			result[1],
+			result[2]
+		);
+
+	#else
+
+	return Vector3<double>(
+			x * scalar.x,
+			y * scalar.y,
+			z * scalar.z
+		);
+
+	#endif
+}
+
+template<>
+Vector3<int> Vector3<int>::VectorScale(const Vector3<int>& scalar) const
+{
+	return Vector3<int>(
+			x * scalar.x,
+			y * scalar.y,
+			z * scalar.z
+	);
+}
+
+
+
+template<typename T>
+Vector3<double> Vector3<T>::Normalize() const
+{
+	Vector3<double> norm(x, y, z);
+	norm.NormalizeSelf();
+
+	return norm;
+}
+
+// Method to normalize a Vector3d
+template<>
+void Vector3<double>::NormalizeSelf()
+{
+	const double length = Magnitude();
+
+	// Prevent division by 0
+	if (length == 0)
+	{
+		x = 0;
+		y = 0;
+		z = 0;
+	}
+	else
+	{
+		#ifndef _EULE_NO_INTRINSICS_
+
+		// Load vector and length into registers
+		__m256d __vec = _mm256_set_pd(0, z, y, x);
+		__m256d __len = _mm256_set1_pd(length);
+
+		// Divide
+		__m256d __prod = _mm256_div_pd(__vec, __len);
+
+		// Extract and set values
+		double prod[4];
+		_mm256_storeu_pd(prod, __prod);
+
+		x = prod[0];
+		y = prod[1];
+		z = prod[2];
+		
+		#else
+		
+		x /= length;
+		y /= length;
+		z /= length;
+		
+		#endif
+	}
+
+	return;
+}
+
+// You can't normalize an int vector, ffs!
+// But we need an implementation for T=int
+template<>
+void Vector3<int>::NormalizeSelf()
+{
+	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
+	x = 0;
+	y = 0;
+	z = 0;
+
+	return;
+}
+
+
+
+template<typename T>
+bool Vector3<T>::Similar(const Vector3<T>& other, double epsilon) const
+{
+	return
+		(::Eule::Math::Similar(x, other.x, epsilon)) &&
+		(::Eule::Math::Similar(y, other.y, epsilon)) &&
+		(::Eule::Math::Similar(z, other.z, epsilon))
+	;
+}
+
+template<typename T>
+Vector3<int> Vector3<T>::ToInt() const
+{
+	return Vector3<int>((int)x, (int)y, (int)z);
+}
+
+template<typename T>
+Vector3<double> Vector3<T>::ToDouble() const
+{
+	return Vector3<double>((double)x, (double)y, (double)z);
+}
+
+template<typename T>
+T& Vector3<T>::operator[](std::size_t idx)
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	case 2:
+		return z;
+	default:
+		throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
+	}
+}
+
+template<typename T>
+const T& Vector3<T>::operator[](std::size_t idx) const
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	case 2:
+		return z;
+	default:
+		throw std::out_of_range("Array descriptor on Vector3<T> out of range!");
+	}
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+void Vector3<double>::LerpSelf(const Vector3<double>& other, double t)
+{
+	const double it = 1.0 - t; // Inverse t
+
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
+	__m256d __t = _mm256_set1_pd(t);
+	__m256d __it = _mm256_set1_pd(it); // Inverse t
+
+	// Procedure:
+	// (__vector_self * __it) + (__vector_other * __t)
+
+	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
+
+	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
+	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
+
+	// Retrieve result, and apply it
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+	z = sum[2];
+
+	#else
+
+	x = it*x + t*other.x;
+	y = it*y + t*other.y;
+	z = it*z + t*other.z;
+
+	#endif
+
+	return;
+}
+
+
+
+// Slow, lame version for intcels
+template<>
+void Vector3<int>::LerpSelf(const Vector3<int>& other, double t)
+{
+	const double it = 1.0 - t; // Inverse t
+
+	x = (int)(it * (double)x + t * (double)other.x);
+	y = (int)(it * (double)y + t * (double)other.y);
+	z = (int)(it * (double)z + t * (double)other.z);
+
+	return;
+}
+
+template<>
+Vector3<double> Vector3<double>::Lerp(const Vector3<double>& other, double t) const
+{
+	Vector3d copy(*this);
+	copy.LerpSelf(other, t);
+
+	return copy;
+}
+
+template<>
+Vector3<double> Vector3<int>::Lerp(const Vector3<int>& other, double t) const
+{
+	Vector3d copy(this->ToDouble());
+	copy.LerpSelf(other.ToDouble(), t);
+
+	return copy;
+}
+
+
+
+template<>
+Vector3<double> Vector3<double>::operator+(const Vector3<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	return Vector3<double>(
+			sum[0],
+			sum[1],
+			sum[2]
+		);
+
+	#else
+
+	return Vector3<double>(
+		x + other.x,
+		y + other.y,
+		z + other.z
+	);
+	#endif
+}
+
+template<typename T>
+Vector3<T> Vector3<T>::operator+(const Vector3<T>& other) const
+{
+	return Vector3<T>(
+			x + other.x,
+			y + other.y,
+			z + other.z
+		);
+}
+
+
+
+template<>
+void Vector3<double>::operator+=(const Vector3<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+	z = sum[2];
+
+	#else
+
+	x += other.x;
+	y += other.y;
+	z += other.z;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector3<T>::operator+=(const Vector3<T>& other)
+{
+	x += other.x;
+	y += other.y;
+	z += other.z;
+	return;
+}
+
+
+
+template<>
+Vector3<double> Vector3<double>::operator-(const Vector3<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	return Vector3<double>(
+			diff[0],
+			diff[1],
+			diff[2]
+		);
+
+	#else
+
+	return Vector3<double>(
+			x - other.x,
+			y - other.y,
+			z - other.z
+		);
+	#endif
+}
+
+template<typename T>
+Vector3<T> Vector3<T>::operator-(const Vector3<T>& other) const
+{
+	return Vector3<T>(
+		x - other.x,
+		y - other.y,
+		z - other.z
+	);
+}
+
+
+
+template<>
+void Vector3<double>::operator-=(const Vector3<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(0, other.z, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	x = diff[0];
+	y = diff[1];
+	z = diff[2];
+
+	#else
+
+	x -= other.x;
+	y -= other.y;
+	z -= other.z;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector3<T>::operator-=(const Vector3<T>& other)
+{
+	x -= other.x;
+	y -= other.y;
+	z -= other.z;
+	return;
+}
+
+
+
+template<>
+Vector3<double> Vector3<double>::operator*(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector3<double>(
+			prod[0],
+			prod[1],
+			prod[2]
+		);
+
+	#else
+
+	return Vector3<double>(
+			x * scale,
+			y * scale,
+			z * scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector3<T> Vector3<T>::operator*(const T scale) const
+{
+	return Vector3<T>(
+		x * scale,
+		y * scale,
+		z * scale
+	);
+}
+
+
+
+template<>
+void Vector3<double>::operator*=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+	z = prod[2];
+
+	#else
+
+	x *= scale;
+	y *= scale;
+	z *= scale;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector3<T>::operator*=(const T scale)
+{
+	x *= scale;
+	y *= scale;
+	z *= scale;
+	return;
+}
+
+
+
+template<>
+Vector3<double> Vector3<double>::operator/(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector3<double>(
+		prod[0],
+		prod[1],
+		prod[2]
+	);
+
+	#else
+
+	return Vector3<double>(
+			x / scale,
+			y / scale,
+			z / scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector3<T> Vector3<T>::operator/(const T scale) const
+{
+	return Vector3<T>(
+			x / scale,
+			y / scale,
+			z / scale
+		);
+}
+
+
+
+template<>
+void Vector3<double>::operator/=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(0, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+	z = prod[2];
+
+	#else
+
+	x /= scale;
+	y /= scale;
+	z /= scale;
+
+	#endif
+	return;
+}
+
+template<typename T>
+void Vector3<T>::operator/=(const T scale)
+{
+	x /= scale;
+	y /= scale;
+	z /= scale;
+	return;
+}
+
+
+// Good, optimized chad version for doubles
+template<>
+Vector3<double> Vector3<double>::operator*(const Matrix4x4& mat) const
+{
+	Vector3<double> newVec;
+
+	#ifndef _EULE_NO_INTRINSICS_
+	// Store x, y, and z values
+	__m256d __vecx = _mm256_set1_pd(x);
+	__m256d __vecy = _mm256_set1_pd(y);
+	__m256d __vecz = _mm256_set1_pd(z);
+
+	// Store matrix values
+	__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
+	__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
+	__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
+
+	// Multiply x, y, z and matrix values
+	__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
+	__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
+	__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
+
+	// Sum up the products
+	__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
+
+	// Store translation values
+	__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
+
+	// Add the translation values
+	__m256d __final = _mm256_add_pd(__sum, __translation);
+
+	double dfinal[4];
+
+	_mm256_storeu_pd(dfinal, __final);
+
+	newVec.x = dfinal[3];
+	newVec.y = dfinal[2];
+	newVec.z = dfinal[1];
+
+	#else
+	// Rotation, Scaling
+	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
+	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
+	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
+
+	// Translation
+	newVec.x += mat[0][3];
+	newVec.y += mat[1][3];
+	newVec.z += mat[2][3];
+	#endif
+
+	return newVec;
+}
+
+// Slow, lame version for intcels
+template<>
+Vector3<int> Vector3<int>::operator*(const Matrix4x4& mat) const
+{
+	Vector3<double> newVec;
+
+	// Rotation, Scaling
+	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z);
+	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z);
+	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z);
+
+	// Translation
+	newVec.x += mat[0][3];
+	newVec.y += mat[1][3];
+	newVec.z += mat[2][3];
+
+	return Vector3<int>(
+		(int)newVec.x,
+		(int)newVec.y,
+		(int)newVec.z
+	);
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+void Vector3<double>::operator*=(const Matrix4x4& mat)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+	// Store x, y, and z values
+	__m256d __vecx = _mm256_set1_pd(x);
+	__m256d __vecy = _mm256_set1_pd(y);
+	__m256d __vecz = _mm256_set1_pd(z);
+
+	// Store matrix values
+	__m256d __mat_row0 = _mm256_set_pd(mat[0][0], mat[1][0], mat[2][0], 0);
+	__m256d __mat_row1 = _mm256_set_pd(mat[0][1], mat[1][1], mat[2][1], 0);
+	__m256d __mat_row2 = _mm256_set_pd(mat[0][2], mat[1][2], mat[2][2], 0);
+
+	// Multiply x, y, z and matrix values
+	__m256d __mul_vecx_row0 = _mm256_mul_pd(__vecx, __mat_row0);
+	__m256d __mul_vecy_row1 = _mm256_mul_pd(__vecy, __mat_row1);
+	__m256d __mul_vecz_row2 = _mm256_mul_pd(__vecz, __mat_row2);
+
+	// Sum up the products
+	__m256d __sum = _mm256_add_pd(__mul_vecx_row0, _mm256_add_pd(__mul_vecy_row1, __mul_vecz_row2));
+
+	// Store translation values
+	__m256d __translation = _mm256_set_pd(mat[0][3], mat[1][3], mat[2][3], 0);
+
+	// Add the translation values
+	__m256d __final = _mm256_add_pd(__sum, __translation);
+
+	double dfinal[4];
+
+	_mm256_storeu_pd(dfinal, __final);
+
+	x = dfinal[3];
+	y = dfinal[2];
+	z = dfinal[1];
+
+	#else
+	Vector3<double> buffer = *this;
+	x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z);
+	y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z);
+	z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z);
+	
+	// Translation
+	x += mat[0][3];
+	y += mat[1][3];
+	z += mat[2][3];
+	#endif
+
+	return;
+}
+
+template<typename T>
+Vector3<T> Vector3<T>::operator-() const
+{
+	return Vector3<T>(
+		-x,
+		-y,
+		-z
+	);
+}
+
+template<typename T>
+void Vector3<T>::operator=(const Vector3<T>& other)
+{
+	x = other.x;
+	y = other.y;
+	z = other.z;
+
+	return;
+}
+
+template<typename T>
+void Vector3<T>::operator=(Vector3<T>&& other) noexcept
+{
+	x = std::move(other.x);
+	y = std::move(other.y);
+	z = std::move(other.z);
+
+	return;
+}
+
+// Slow, lame version for intcels
+template<>
+void Vector3<int>::operator*=(const Matrix4x4& mat)
+{
+	Vector3<double> buffer(x, y, z);
+
+	x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z));
+	y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z));
+	z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z));
+
+	// Translation
+	x += (int)mat[0][3];
+	y += (int)mat[1][3];
+	z += (int)mat[2][3];
+
+	return;
+}
+
+
+
+template<typename T>
+bool Vector3<T>::operator==(const Vector3<T>& other) const
+{
+	return
+		(x == other.x) &&
+		(y == other.y) &&
+		(z == other.z);
+}
+
+template<typename T>
+bool Vector3<T>::operator!=(const Vector3<T>& other) const
+{
+	return !operator==(other);
+}
+
+template class Vector3<int>;
+template class Vector3<double>;
+
+// Some handy predefines
+template <typename T>
+const Vector3<double> Vector3<T>::up(0, 1, 0);
+template <typename T>
+const Vector3<double> Vector3<T>::down(0, -1, 0);
+template <typename T>
+const Vector3<double> Vector3<T>::right(1, 0, 0);
+template <typename T>
+const Vector3<double> Vector3<T>::left(-1, 0, 0);
+template <typename T>
+const Vector3<double> Vector3<T>::forward(0, 0, 1);
+template <typename T>
+const Vector3<double> Vector3<T>::backward(0, 0, -1);
+template <typename T>
+const Vector3<double> Vector3<T>::one(1, 1, 1);
+template <typename T>
+const Vector3<double> Vector3<T>::zero(0, 0, 0);
+
 }
--- a/Eule/Vector4.cpp
+++ b/Eule/Vector4.cpp
@@ -1,831 +1,15 @@
 #include "Vector4.h"
-#include "Math.h"
-#include <iostream>
-
-//#define _EULE_NO_INTRINSICS_
-#ifndef _EULE_NO_INTRINSICS_
-#include <immintrin.h>
-#endif
-
-using namespace Eule;
-
-/*
-	NOTE:
-	Here you will find bad, unoptimized methods for T=int.
-	This is because the compiler needs a method for each type in each instantiation of the template!
-	I can't generalize the methods when heavily optimizing for doubles.
-	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
-	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
-*/
-
-// Good, optimized chad version for doubles
-template<>
-double Vector4<double>::SqrMagnitude() const
-{
-	return (x * x) +
-		   (y * y) +
-		   (z * z) +
-		   (w * w);
-}
-
-// Slow, lame version for intcels
-template<>
-double Vector4<int>::SqrMagnitude() const
-{
-	int iSqrMag = x*x + y*y + z*z + w*w;
-	return (double)iSqrMag;
-}
-
-template<typename T>
-double Vector4<T>::Magnitude() const
-{
-	return sqrt(SqrMagnitude());
-}
-
-
-template<>
-Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Load vectors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
-
-	// Multiply them
-	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
-
-	// Retrieve result
-	double result[4];
-	_mm256_storeu_pd(result, __product);
-
-	// Return value
-	return Vector4<double>(
-		result[0],
-		result[1],
-		result[2],
-		result[3]
-		);
-
-	#else
-
-	return Vector4<double>(
-			x * scalar.x,
-			y * scalar.y,
-			z * scalar.z,
-			w * scalar.w
-		);
-	#endif
-}
-
-
-template<>
-Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
-{
-	return Vector4<int>(
-			x * scalar.x,
-			y * scalar.y,
-			z * scalar.z,
-			w * scalar.w
-		);
-}
-
-
-
-template<typename T>
-Vector4<double> Vector4<T>::Normalize() const
-{
-	Vector4<double> norm(x, y, z, w);
-	norm.NormalizeSelf();
-
-	return norm;
-}
-
-// Method to normalize a Vector4d
-template<>
-void Vector4<double>::NormalizeSelf()
-{
-	double length = Magnitude();
-
-	// Prevent division by 0
-	if (length == 0)
-	{
-		x = 0;
-		y = 0;
-		z = 0;
-		w = 0;
-	}
-	else
-	{
-		#ifndef _EULE_NO_INTRINSICS_
-
-		// Load vector and length into registers
-		__m256d __vec = _mm256_set_pd(w, z, y, x);
-		__m256d __len = _mm256_set1_pd(length);
-
-		// Divide
-		__m256d __prod = _mm256_div_pd(__vec, __len);
-
-		// Extract and set values
-		double prod[4];
-		_mm256_storeu_pd(prod, __prod);
-
-		x = prod[0];
-		y = prod[1];
-		z = prod[2];
-		w = prod[3];
-
-		#else
-
-		x /= length;
-		y /= length;
-		z /= length;
-		w /= length;
-
-		#endif
-	}
-
-	return;
-}
-
-// You can't normalize an int vector, ffs!
-// But we need an implementation for T=int
-template<>
-void Vector4<int>::NormalizeSelf()
-{
-	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
-	x = 0;
-	y = 0;
-	z = 0;
-	w = 0;
-
-	return;
-}
-
-
-
-template<typename T>
-bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
-{
-	return
-		(::Math::Similar(x, other.x, epsilon)) &&
-		(::Math::Similar(y, other.y, epsilon)) &&
-		(::Math::Similar(z, other.z, epsilon)) &&
-		(::Math::Similar(w, other.w, epsilon))
-	;
-}
-
-template<typename T>
-Vector4<int> Vector4<T>::ToInt() const
-{
-	return Vector4<int>((int)x, (int)y, (int)z, (int)w);
-}
-
-template<typename T>
-Vector4<double> Vector4<T>::ToDouble() const
-{
-	return Vector4<double>((double)x, (double)y, (double)z, (double)w);
-}
-
-template<typename T>
-T& Vector4<T>::operator[](std::size_t idx)
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	case 2:
-		return z;
-	case 3:
-		return w;
-	default:
-		throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
-	}
-}
-
-template<typename T>
-const T& Vector4<T>::operator[](std::size_t idx) const
-{
-	switch (idx)
-	{
-	case 0:
-		return x;
-	case 1:
-		return y;
-	case 2:
-		return z;
-	case 3:
-		return w;
-	default:
-		throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
-	}
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
-{
-	const double it = 1.0 - t; // Inverse t
-
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
-	__m256d __t = _mm256_set1_pd(t);
-	__m256d __it = _mm256_set1_pd(it); // Inverse t
-
-	// Procedure:
-	// (__vector_self * __it) + (__vector_other * __t)
-
-	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
-
-	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
-	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
-
-	// Retrieve result, and apply it
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-	z = sum[2];
-	w = sum[3];
-
-	#else
-
-	x = it * x + t * other.x;
-	y = it * y + t * other.y;
-	z = it * z + t * other.z;
-	w = it * w + t * other.w;
-
-	#endif
-
-	return;
-}
-
-
-
-// Slow, lame version for intcels
-template<>
-void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
-{
-	const double it = 1.0 - t;
-
-	x = (int)(it * (double)x + t * (double)other.x);
-	y = (int)(it * (double)y + t * (double)other.y);
-	z = (int)(it * (double)z + t * (double)other.z);
-	w = (int)(it * (double)w + t * (double)other.w);
-
-	return;
-}
-
-template<>
-Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
-{
-	Vector4d copy(*this);
-	copy.LerpSelf(other, t);
-
-	return copy;
-}
-
-template<>
-Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
-{
-	Vector4d copy(this->ToDouble());
-	copy.LerpSelf(other.ToDouble(), t);
-
-	return copy;
-}
-
-
-
-template<>
-Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	return Vector4<double>(
-			sum[0],
-			sum[1],
-			sum[2],
-			sum[3]
-		);
-
-	#else
-
-	return Vector4<double>(
-		x + other.x,
-		y + other.y,
-		z + other.z,
-		w + other.w
-	);
-	#endif
-}
-
-template<typename T>
-Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
-{
-	return Vector4<T>(
-			x + other.x,
-			y + other.y,
-			z + other.z,
-			w + other.w
-		);
-}
-
-
-
-template<>
-void Vector4<double>::operator+=(const Vector4<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
-
-	// Add the components
-	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double sum[4];
-	_mm256_storeu_pd(sum, __sum);
-
-	x = sum[0];
-	y = sum[1];
-	z = sum[2];
-	w = sum[3];
-
-	#else
-
-	x += other.x;
-	y += other.y;
-	z += other.z;
-	w += other.w;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector4<T>::operator+=(const Vector4<T>& other)
-{
-	x += other.x;
-	y += other.y;
-	z += other.z;
-	w += other.w;
-	return;
-}
-
-
-
-template<>
-Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and return these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	return Vector4<double>(
-			diff[0],
-			diff[1],
-			diff[2],
-			diff[3]
-		);
-
-	#else
-
-	return Vector4<double>(
-			x - other.x,
-			y - other.y,
-			z - other.z,
-			w - other.w
-		);
-	#endif
-}
-
-template<typename T>
-Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
-{
-	return Vector4<T>(
-		x - other.x,
-		y - other.y,
-		z - other.z,
-		w - other.w
-	);
-}
-
-
-
-template<>
-void Vector4<double>::operator-=(const Vector4<double>& other)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
-
-	// Subtract the components
-	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
-
-	// Retrieve and apply these values
-	double diff[4];
-	_mm256_storeu_pd(diff, __diff);
-
-	x = diff[0];
-	y = diff[1];
-	z = diff[2];
-	w = diff[3];
-
-	#else
-
-	x -= other.x;
-	y -= other.y;
-	z -= other.z;
-	w -= other.w;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector4<T>::operator-=(const Vector4<T>& other)
-{
-	x -= other.x;
-	y -= other.y;
-	z -= other.z;
-	w -= other.w;
-	return;
-}
-
-
-
-template<>
-Vector4<double> Vector4<double>::operator*(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector4<double>(
-			prod[0],
-			prod[1],
-			prod[2],
-			prod[3]
-		);
-
-	#else
-
-	return Vector4<double>(
-			x * scale,
-			y * scale,
-			z * scale,
-			w * scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector4<T> Vector4<T>::operator*(const T scale) const
-{
-	return Vector4<T>(
-		x * scale,
-		y * scale,
-		z * scale,
-		w * scale
-	);
-}
-
-
-
-template<>
-void Vector4<double>::operator*=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Multiply the components
-	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-	z = prod[2];
-	w = prod[3];
-
-	#else
-
-	x *= scale;
-	y *= scale;
-	z *= scale;
-	w *= scale;
-
-	#endif
-
-	return;
-}
-
-template<typename T>
-void Vector4<T>::operator*=(const T scale)
-{
-	x *= scale;
-	y *= scale;
-	z *= scale;
-	w *= scale;
-	return;
-}
-
-
-
-template<>
-Vector4<double> Vector4<double>::operator/(const double scale) const
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and return these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	return Vector4<double>(
-		prod[0],
-		prod[1],
-		prod[2],
-		prod[3]
-	);
-
-	#else
-
-	return Vector4<double>(
-			x / scale,
-			y / scale,
-			z / scale,
-			w / scale
-		);
-
-	#endif
-}
-
-template<typename T>
-Vector4<T> Vector4<T>::operator/(const T scale) const
-{
-	return Vector4<T>(
-			x / scale,
-			y / scale,
-			z / scale,
-			w / scale
-		);
-}
-
-
-
-template<>
-void Vector4<double>::operator/=(const double scale)
-{
-	#ifndef _EULE_NO_INTRINSICS_
-
-	// Move vector components and factors into registers
-	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
-	__m256d __scalar = _mm256_set1_pd(scale);
-
-	// Divide the components
-	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
-
-	// Retrieve and apply these values
-	double prod[4];
-	_mm256_storeu_pd(prod, __prod);
-
-	x = prod[0];
-	y = prod[1];
-	z = prod[2];
-	w = prod[3];
-
-	#else
-
-	x /= scale;
-	y /= scale;
-	z /= scale;
-	w /= scale;
-
-	#endif
-	return;
-}
-
-template<typename T>
-void Vector4<T>::operator/=(const T scale)
-{
-	x /= scale;
-	y /= scale;
-	z /= scale;
-	w /= scale;
-	return;
-}
-
-
-
-template<typename T>
-bool Vector4<T>::operator==(const Vector4<T>& other) const
-{
-	return
-		(x == other.x) &&
-		(y == other.y) &&
-		(z == other.z) &&
-		(w == other.w);
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
-{
-	Vector4<double> newVec;
-	
-	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
-	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
-	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
-	newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
-
-	return newVec;
-}
-
-// Slow, lame version for intcels
-template<>
-Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
-{
-	Vector4<double> newVec;
-
-	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
-	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
-	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
-	newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
-
-	return Vector4<int>(
-		(int)newVec.x,
-		(int)newVec.y,
-		(int)newVec.z,
-		(int)newVec.w
-	);
-}
-
-
-
-// Good, optimized chad version for doubles
-template<>
-void Vector4<double>::operator*=(const Matrix4x4& mat)
-{
-	Vector4<double> buffer = *this;
-
-	// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
-	// idk right now. check that if something doesn't work
-	x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
-	y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
-	z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
-	w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
-
-	return;
-}
-
-template<typename T>
-Vector4<T> Vector4<T>::operator-() const
-{
-	return Vector4<T>(
-		-x,
-		-y,
-		-z,
-		-w
-	);
-}
-
-template<typename T>
-void Vector4<T>::operator=(const Vector4<T>& other)
-{
-	x = other.x;
-	y = other.y;
-	z = other.z;
-	w = other.w;
-
-	return;
-}
-
-template<typename T>
-void Vector4<T>::operator=(Vector4<T>&& other) noexcept
-{
-	x = std::move(other.x);
-	y = std::move(other.y);
-	z = std::move(other.z);
-	w = std::move(other.w);
-
-	return;
-}
-
-// Slow, lame version for intcels
-template<>
-void Vector4<int>::operator*=(const Matrix4x4& mat)
-{
-	Vector4<double> buffer(x, y, z, w);
-
-	// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
-	// idk right now. check that if something doesn't work
-	x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
-	y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
-	z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
-	w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
-
-	return;
-}
-
-template<typename T>
-bool Vector4<T>::operator!=(const Vector4<T>& other) const
-{
-	return !operator==(other);
-}
-
 #include "Vector2.h"
 #include "Vector3.h"
+
 template<typename T>
-Vector4<T>::operator Vector2<T>() const
+Eule::Vector4<T>::operator Eule::Vector2<T>() const
 {
 	return Vector2<T>(x, y);
 }

 template<typename T>
-Vector4<T>::operator Vector3<T>() const
+Eule::Vector4<T>::operator Eule::Vector3<T>() const
 {
 	return Vector3<T>(x, y, z);
 }
-
-template class Vector4<int>;
-template class Vector4<double>;
-
-// Some handy predefines
-template <typename T>
-const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
-template <typename T>
-const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
-template <typename T>
-const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
-template <typename T>
-const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
-template <typename T>
-const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
--- a/Eule/Vector4.h
+++ b/Eule/Vector4.h
@@ -5,6 +5,22 @@
 #include <sstream>
 #include "Matrix4x4.h"

+#include "Math.h"
+#include <iostream>
+
+//#define _EULE_NO_INTRINSICS_
+#ifndef _EULE_NO_INTRINSICS_
+#include <immintrin.h>
+#endif
+
+/*
+	NOTE:
+	Here you will find bad, unoptimized methods for T=int.
+	This is because the compiler needs a method for each type in each instantiation of the template!
+	I can't generalize the methods when heavily optimizing for doubles.
+	These functions will get called VERY rarely, if ever at all, for T=int, so it's ok.
+	The T=int instantiation only exists to store a value-pair of two ints. Not so-much as a vector in terms of vector calculus.
+*/
 namespace Eule
 {
 	template <typename T> class Vector2;
@@ -105,4 +121,803 @@ namespace Eule

 	typedef Vector4<int> Vector4i;
 	typedef Vector4<double> Vector4d;
+
+	// Good, optimized chad version for doubles
+template<>
+double Vector4<double>::SqrMagnitude() const
+{
+	return (x * x) +
+		   (y * y) +
+		   (z * z) +
+		   (w * w);
+}
+
+// Slow, lame version for intcels
+template<>
+double Vector4<int>::SqrMagnitude() const
+{
+	int iSqrMag = x*x + y*y + z*z + w*w;
+	return (double)iSqrMag;
+}
+
+template<typename T>
+double Vector4<T>::Magnitude() const
+{
+	return sqrt(SqrMagnitude());
+}
+
+
+template<>
+Vector4<double> Vector4<double>::VectorScale(const Vector4<double>& scalar) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Load vectors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_scalar = _mm256_set_pd(scalar.w, scalar.z, scalar.y, scalar.x);
+
+	// Multiply them
+	__m256d __product = _mm256_mul_pd(__vector_self, __vector_scalar);
+
+	// Retrieve result
+	double result[4];
+	_mm256_storeu_pd(result, __product);
+
+	// Return value
+	return Vector4<double>(
+		result[0],
+		result[1],
+		result[2],
+		result[3]
+		);
+
+	#else
+
+	return Vector4<double>(
+			x * scalar.x,
+			y * scalar.y,
+			z * scalar.z,
+			w * scalar.w
+		);
+	#endif
+}
+
+
+template<>
+Vector4<int> Vector4<int>::VectorScale(const Vector4<int>& scalar) const
+{
+	return Vector4<int>(
+			x * scalar.x,
+			y * scalar.y,
+			z * scalar.z,
+			w * scalar.w
+		);
+}
+
+
+
+template<typename T>
+Vector4<double> Vector4<T>::Normalize() const
+{
+	Vector4<double> norm(x, y, z, w);
+	norm.NormalizeSelf();
+
+	return norm;
+}
+
+// Method to normalize a Vector4d
+template<>
+void Vector4<double>::NormalizeSelf()
+{
+	double length = Magnitude();
+
+	// Prevent division by 0
+	if (length == 0)
+	{
+		x = 0;
+		y = 0;
+		z = 0;
+		w = 0;
+	}
+	else
+	{
+		#ifndef _EULE_NO_INTRINSICS_
+
+		// Load vector and length into registers
+		__m256d __vec = _mm256_set_pd(w, z, y, x);
+		__m256d __len = _mm256_set1_pd(length);
+
+		// Divide
+		__m256d __prod = _mm256_div_pd(__vec, __len);
+
+		// Extract and set values
+		double prod[4];
+		_mm256_storeu_pd(prod, __prod);
+
+		x = prod[0];
+		y = prod[1];
+		z = prod[2];
+		w = prod[3];
+
+		#else
+
+		x /= length;
+		y /= length;
+		z /= length;
+		w /= length;
+
+		#endif
+	}
+
+	return;
+}
+
+// You can't normalize an int vector, ffs!
+// But we need an implementation for T=int
+template<>
+void Vector4<int>::NormalizeSelf()
+{
+	std::cerr << "Stop normalizing int-vectors!!" << std::endl;
+	x = 0;
+	y = 0;
+	z = 0;
+	w = 0;
+
+	return;
+}
+
+
+
+template<typename T>
+bool Vector4<T>::Similar(const Vector4<T>& other, double epsilon) const
+{
+	return
+		(::Eule::Math::Similar(x, other.x, epsilon)) &&
+		(::Eule::Math::Similar(y, other.y, epsilon)) &&
+		(::Eule::Math::Similar(z, other.z, epsilon)) &&
+		(::Eule::Math::Similar(w, other.w, epsilon))
+	;
+}
+
+template<typename T>
+Vector4<int> Vector4<T>::ToInt() const
+{
+	return Vector4<int>((int)x, (int)y, (int)z, (int)w);
+}
+
+template<typename T>
+Vector4<double> Vector4<T>::ToDouble() const
+{
+	return Vector4<double>((double)x, (double)y, (double)z, (double)w);
+}
+
+template<typename T>
+T& Vector4<T>::operator[](std::size_t idx)
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	case 2:
+		return z;
+	case 3:
+		return w;
+	default:
+		throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
+	}
+}
+
+template<typename T>
+const T& Vector4<T>::operator[](std::size_t idx) const
+{
+	switch (idx)
+	{
+	case 0:
+		return x;
+	case 1:
+		return y;
+	case 2:
+		return z;
+	case 3:
+		return w;
+	default:
+		throw std::out_of_range("Array descriptor on Vector4<T> out of range!");
+	}
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+void Vector4<double>::LerpSelf(const Vector4<double>& other, double t)
+{
+	const double it = 1.0 - t; // Inverse t
+
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
+	__m256d __t = _mm256_set1_pd(t);
+	__m256d __it = _mm256_set1_pd(it); // Inverse t
+
+	// Procedure:
+	// (__vector_self * __it) + (__vector_other * __t)
+
+	__m256d __sum = _mm256_set1_pd(0); // this will hold the sum of the two multiplications
+
+	__sum = _mm256_fmadd_pd(__vector_self, __it, __sum);
+	__sum = _mm256_fmadd_pd(__vector_other, __t, __sum);
+
+	// Retrieve result, and apply it
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+	z = sum[2];
+	w = sum[3];
+
+	#else
+
+	x = it * x + t * other.x;
+	y = it * y + t * other.y;
+	z = it * z + t * other.z;
+	w = it * w + t * other.w;
+
+	#endif
+
+	return;
+}
+
+
+
+// Slow, lame version for intcels
+template<>
+void Vector4<int>::LerpSelf(const Vector4<int>& other, double t)
+{
+	const double it = 1.0 - t;
+
+	x = (int)(it * (double)x + t * (double)other.x);
+	y = (int)(it * (double)y + t * (double)other.y);
+	z = (int)(it * (double)z + t * (double)other.z);
+	w = (int)(it * (double)w + t * (double)other.w);
+
+	return;
+}
+
+template<>
+Vector4<double> Vector4<double>::Lerp(const Vector4<double>& other, double t) const
+{
+	Vector4d copy(*this);
+	copy.LerpSelf(other, t);
+
+	return copy;
+}
+
+template<>
+Vector4<double> Vector4<int>::Lerp(const Vector4<int>& other, double t) const
+{
+	Vector4d copy(this->ToDouble());
+	copy.LerpSelf(other.ToDouble(), t);
+
+	return copy;
+}
+
+
+
+template<>
+Vector4<double> Vector4<double>::operator+(const Vector4<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	return Vector4<double>(
+			sum[0],
+			sum[1],
+			sum[2],
+			sum[3]
+		);
+
+	#else
+
+	return Vector4<double>(
+		x + other.x,
+		y + other.y,
+		z + other.z,
+		w + other.w
+	);
+	#endif
+}
+
+template<typename T>
+Vector4<T> Vector4<T>::operator+(const Vector4<T>& other) const
+{
+	return Vector4<T>(
+			x + other.x,
+			y + other.y,
+			z + other.z,
+			w + other.w
+		);
+}
+
+
+
+template<>
+void Vector4<double>::operator+=(const Vector4<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
+
+	// Add the components
+	__m256d __sum = _mm256_add_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double sum[4];
+	_mm256_storeu_pd(sum, __sum);
+
+	x = sum[0];
+	y = sum[1];
+	z = sum[2];
+	w = sum[3];
+
+	#else
+
+	x += other.x;
+	y += other.y;
+	z += other.z;
+	w += other.w;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector4<T>::operator+=(const Vector4<T>& other)
+{
+	x += other.x;
+	y += other.y;
+	z += other.z;
+	w += other.w;
+	return;
+}
+
+
+
+template<>
+Vector4<double> Vector4<double>::operator-(const Vector4<double>& other) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and return these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	return Vector4<double>(
+			diff[0],
+			diff[1],
+			diff[2],
+			diff[3]
+		);
+
+	#else
+
+	return Vector4<double>(
+			x - other.x,
+			y - other.y,
+			z - other.z,
+			w - other.w
+		);
+	#endif
+}
+
+template<typename T>
+Vector4<T> Vector4<T>::operator-(const Vector4<T>& other) const
+{
+	return Vector4<T>(
+		x - other.x,
+		y - other.y,
+		z - other.z,
+		w - other.w
+	);
+}
+
+
+
+template<>
+void Vector4<double>::operator-=(const Vector4<double>& other)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __vector_other = _mm256_set_pd(other.w, other.z, other.y, other.x);
+
+	// Subtract the components
+	__m256d __diff = _mm256_sub_pd(__vector_self, __vector_other);
+
+	// Retrieve and apply these values
+	double diff[4];
+	_mm256_storeu_pd(diff, __diff);
+
+	x = diff[0];
+	y = diff[1];
+	z = diff[2];
+	w = diff[3];
+
+	#else
+
+	x -= other.x;
+	y -= other.y;
+	z -= other.z;
+	w -= other.w;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector4<T>::operator-=(const Vector4<T>& other)
+{
+	x -= other.x;
+	y -= other.y;
+	z -= other.z;
+	w -= other.w;
+	return;
+}
+
+
+
+template<>
+Vector4<double> Vector4<double>::operator*(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector4<double>(
+			prod[0],
+			prod[1],
+			prod[2],
+			prod[3]
+		);
+
+	#else
+
+	return Vector4<double>(
+			x * scale,
+			y * scale,
+			z * scale,
+			w * scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector4<T> Vector4<T>::operator*(const T scale) const
+{
+	return Vector4<T>(
+		x * scale,
+		y * scale,
+		z * scale,
+		w * scale
+	);
+}
+
+
+
+template<>
+void Vector4<double>::operator*=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Multiply the components
+	__m256d __prod = _mm256_mul_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+	z = prod[2];
+	w = prod[3];
+
+	#else
+
+	x *= scale;
+	y *= scale;
+	z *= scale;
+	w *= scale;
+
+	#endif
+
+	return;
+}
+
+template<typename T>
+void Vector4<T>::operator*=(const T scale)
+{
+	x *= scale;
+	y *= scale;
+	z *= scale;
+	w *= scale;
+	return;
+}
+
+
+
+template<>
+Vector4<double> Vector4<double>::operator/(const double scale) const
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and return these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	return Vector4<double>(
+		prod[0],
+		prod[1],
+		prod[2],
+		prod[3]
+	);
+
+	#else
+
+	return Vector4<double>(
+			x / scale,
+			y / scale,
+			z / scale,
+			w / scale
+		);
+
+	#endif
+}
+
+template<typename T>
+Vector4<T> Vector4<T>::operator/(const T scale) const
+{
+	return Vector4<T>(
+			x / scale,
+			y / scale,
+			z / scale,
+			w / scale
+		);
+}
+
+
+
+template<>
+void Vector4<double>::operator/=(const double scale)
+{
+	#ifndef _EULE_NO_INTRINSICS_
+
+	// Move vector components and factors into registers
+	__m256d __vector_self = _mm256_set_pd(w, z, y, x);
+	__m256d __scalar = _mm256_set1_pd(scale);
+
+	// Divide the components
+	__m256d __prod = _mm256_div_pd(__vector_self, __scalar);
+
+	// Retrieve and apply these values
+	double prod[4];
+	_mm256_storeu_pd(prod, __prod);
+
+	x = prod[0];
+	y = prod[1];
+	z = prod[2];
+	w = prod[3];
+
+	#else
+
+	x /= scale;
+	y /= scale;
+	z /= scale;
+	w /= scale;
+
+	#endif
+	return;
+}
+
+template<typename T>
+void Vector4<T>::operator/=(const T scale)
+{
+	x /= scale;
+	y /= scale;
+	z /= scale;
+	w /= scale;
+	return;
+}
+
+
+
+template<typename T>
+bool Vector4<T>::operator==(const Vector4<T>& other) const
+{
+	return
+		(x == other.x) &&
+		(y == other.y) &&
+		(z == other.z) &&
+		(w == other.w);
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+Vector4<double> Vector4<double>::operator*(const Matrix4x4& mat) const
+{
+	Vector4<double> newVec;
+	
+	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
+	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
+	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
+	newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
+
+	return newVec;
+}
+
+// Slow, lame version for intcels
+template<>
+Vector4<int> Vector4<int>::operator*(const Matrix4x4& mat) const
+{
+	Vector4<double> newVec;
+
+	newVec.x = (mat[0][0] * x) + (mat[0][1] * y) + (mat[0][2] * z) + (mat[0][3] * w);
+	newVec.y = (mat[1][0] * x) + (mat[1][1] * y) + (mat[1][2] * z) + (mat[1][3] * w);
+	newVec.z = (mat[2][0] * x) + (mat[2][1] * y) + (mat[2][2] * z) + (mat[2][3] * w);
+	newVec.w = (mat[3][0] * x) + (mat[3][1] * y) + (mat[3][2] * z) + (mat[3][3] * w);
+
+	return Vector4<int>(
+		(int)newVec.x,
+		(int)newVec.y,
+		(int)newVec.z,
+		(int)newVec.w
+	);
+}
+
+
+
+// Good, optimized chad version for doubles
+template<>
+void Vector4<double>::operator*=(const Matrix4x4& mat)
+{
+	Vector4<double> buffer = *this;
+
+	// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
+	// idk right now. check that if something doesn't work
+	x = (mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w);
+	y = (mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w);
+	z = (mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w);
+	w = (mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w);
+
+	return;
+}
+
+template<typename T>
+Vector4<T> Vector4<T>::operator-() const
+{
+	return Vector4<T>(
+		-x,
+		-y,
+		-z,
+		-w
+	);
+}
+
+template<typename T>
+void Vector4<T>::operator=(const Vector4<T>& other)
+{
+	x = other.x;
+	y = other.y;
+	z = other.z;
+	w = other.w;
+
+	return;
+}
+
+template<typename T>
+void Vector4<T>::operator=(Vector4<T>&& other) noexcept
+{
+	x = std::move(other.x);
+	y = std::move(other.y);
+	z = std::move(other.z);
+	w = std::move(other.w);
+
+	return;
+}
+
+// Slow, lame version for intcels
+template<>
+void Vector4<int>::operator*=(const Matrix4x4& mat)
+{
+	Vector4<double> buffer(x, y, z, w);
+
+	// Should this still be reversed...? like, instead of mat[x][y], use mat[y][m]
+	// idk right now. check that if something doesn't work
+	x = (int)((mat[0][0] * buffer.x) + (mat[0][1] * buffer.y) + (mat[0][2] * buffer.z) + (mat[0][3] * buffer.w));
+	y = (int)((mat[1][0] * buffer.x) + (mat[1][1] * buffer.y) + (mat[1][2] * buffer.z) + (mat[1][3] * buffer.w));
+	z = (int)((mat[2][0] * buffer.x) + (mat[2][1] * buffer.y) + (mat[2][2] * buffer.z) + (mat[2][3] * buffer.w));
+	w = (int)((mat[3][0] * buffer.x) + (mat[3][1] * buffer.y) + (mat[3][2] * buffer.z) + (mat[3][3] * buffer.w));
+
+	return;
+}
+
+template<typename T>
+bool Vector4<T>::operator!=(const Vector4<T>& other) const
+{
+	return !operator==(other);
+}
+
+template class Vector4<int>;
+template class Vector4<double>;
+
+// Some handy predefines
+template <typename T>
+const Vector4<double> Vector4<T>::up(0, 1, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::down(0, -1, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::right(1, 0, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::left(-1, 0, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::forward(1, 0, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::backward(-1, 0, 0, 0);
+template <typename T>
+const Vector4<double> Vector4<T>::future(0, 0, 0, 1);
+template <typename T>
+const Vector4<double> Vector4<T>::past(0, 0, 0, -1);
+template <typename T>
+const Vector4<double> Vector4<T>::one(1, 1, 1, 1);
+template <typename T>
+const Vector4<double> Vector4<T>::zero(0, 0, 0, 0);
+
 }
--- a/Eule/version.h
+++ b/Eule/version.h
@@ -1,2 +1,2 @@
 #pragma once
-#define EULE_VERSION (0.1)
+#define EULE_VERSION (0.11)