SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:31:02 -10:00
parent 0cdfd0ad9f
commit e1b29fda7a
11 changed files with 3003 additions and 596 deletions

View File

@ -1013,7 +1013,7 @@ template <class T>
static inline void __ReadProp(T& obj, athena::io::IStreamReader& r) static inline void __ReadProp(T& obj, athena::io::IStreamReader& r)
{ {
/* Read root 0xffffffff hash (hashed empty string) */ /* Read root 0xffffffff hash (hashed empty string) */
atUint32 hash = T::DNAEndian == Endian::Big ? r.readUint32Big() : r.readUint32Little(); T::DNAEndian == Endian::Big ? r.readUint32Big() : r.readUint32Little();
atInt64 size = T::DNAEndian == Endian::Big ? r.readUint16Big() : r.readUint16Little(); atInt64 size = T::DNAEndian == Endian::Big ? r.readUint16Big() : r.readUint16Little();
atInt64 start = r.position(); atInt64 start = r.position();
__Do<Read<PropType::CRC32>, T, T::DNAEndian>({}, obj, r); __Do<Read<PropType::CRC32>, T, T::DNAEndian>({}, obj, r);
@ -1038,7 +1038,7 @@ template <class T>
static inline void __ReadProp64(T& obj, athena::io::IStreamReader& r) static inline void __ReadProp64(T& obj, athena::io::IStreamReader& r)
{ {
/* Read root 0x0 hash (hashed empty string) */ /* Read root 0x0 hash (hashed empty string) */
atUint64 hash = T::DNAEndian == Endian::Big ? r.readUint64Big() : r.readUint64Little(); T::DNAEndian == Endian::Big ? r.readUint64Big() : r.readUint64Little();
atInt64 size = T::DNAEndian == Endian::Big ? r.readUint16Big() : r.readUint16Little(); atInt64 size = T::DNAEndian == Endian::Big ? r.readUint16Big() : r.readUint16Little();
atInt64 start = r.position(); atInt64 start = r.position();
__Do<Read<PropType::CRC64>, T, T::DNAEndian>({}, obj, r); __Do<Read<PropType::CRC64>, T, T::DNAEndian>({}, obj, r);

View File

@ -9,13 +9,12 @@ std::ostream& operator<<(std::ostream& os, Endian& endian);
class IStream class IStream
{ {
public: public:
IStream() {} virtual ~IStream() = default;
virtual ~IStream() {}
inline void setEndian(Endian endian) { m_endian = endian; } void setEndian(Endian endian) { m_endian = endian; }
inline Endian endian() const { return m_endian; } Endian endian() const { return m_endian; }
inline bool isBigEndian() const { return (m_endian == Endian::Big); } bool isBigEndian() const { return (m_endian == Endian::Big); }
inline bool isLittleEndian() const { return (m_endian == Endian::Little); } bool isLittleEndian() const { return (m_endian == Endian::Little); }
virtual void seek(atInt64, SeekOrigin) = 0; virtual void seek(atInt64, SeekOrigin) = 0;
virtual bool atEnd() const = 0; virtual bool atEnd() const = 0;
virtual atUint64 position() const = 0; virtual atUint64 position() const = 0;

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,8 @@ namespace athena::io
class IStreamWriter : public IStream class IStreamWriter : public IStream
{ {
public: public:
virtual ~IStreamWriter() {} virtual ~IStreamWriter() = default;
/** @brief Sets the buffers position relative to the specified position.<br /> /** @brief Sets the buffers position relative to the specified position.<br />
* It seeks relative to the current position by default. * It seeks relative to the current position by default.
* @param position where in the buffer to seek * @param position where in the buffer to seek
@ -21,11 +22,11 @@ public:
/** @brief Sets the buffers position relative to the next 32-byte aligned position.<br /> /** @brief Sets the buffers position relative to the next 32-byte aligned position.<br />
*/ */
inline void seekAlign32() {seek(ROUND_UP_32(position()), SeekOrigin::Begin);} void seekAlign32() {seek(ROUND_UP_32(position()), SeekOrigin::Begin);}
/** @brief Writes zero up to specified absolute offset.<br /> /** @brief Writes zero up to specified absolute offset.<br />
*/ */
inline void writeZeroTo(atInt64 pos) void writeZeroTo(atInt64 pos)
{ {
atInt64 delta = pos - position(); atInt64 delta = pos - position();
if (delta <= 0) if (delta <= 0)
@ -38,7 +39,7 @@ public:
* *
* @return True if at end; False otherwise. * @return True if at end; False otherwise.
*/ */
inline bool atEnd() const {return position() >= length();} bool atEnd() const {return position() >= length();}
/** @brief Returns the current position in the stream. /** @brief Returns the current position in the stream.
* *
@ -55,18 +56,18 @@ public:
/** @brief Writes a byte at the current position and advances the position by one byte. /** @brief Writes a byte at the current position and advances the position by one byte.
* @param val The value to write * @param val The value to write
*/ */
inline void writeUByte(atUint8 val) {writeUBytes(&val, 1);} void writeUByte(atUint8 val) {writeUBytes(&val, 1);}
inline void writeVal(atUint8 val) {writeUByte(val);} void writeVal(atUint8 val) {writeUByte(val);}
inline void writeValLittle(atUint8 val) {writeUByte(val);} void writeValLittle(atUint8 val) {writeUByte(val);}
inline void writeValBig(atUint8 val) {writeUByte(val);} void writeValBig(atUint8 val) {writeUByte(val);}
/** @brief Writes a byte at the current position and advances the position by one byte. /** @brief Writes a byte at the current position and advances the position by one byte.
* @param val The value to write * @param val The value to write
*/ */
inline void writeByte(atInt8 val) {writeUByte(val);} void writeByte(atInt8 val) {writeUByte(val);}
inline void writeVal(atInt8 val) {writeByte(val);} void writeVal(atInt8 val) {writeByte(val);}
inline void writeValLittle(atInt8 val) {writeByte(val);} void writeValLittle(atInt8 val) {writeByte(val);}
inline void writeValBig(atInt8 val) {writeByte(val);} void writeValBig(atInt8 val) {writeByte(val);}
/** @brief Writes the given buffer with the specified length, buffers can be bigger than the length /** @brief Writes the given buffer with the specified length, buffers can be bigger than the length
* however it's undefined behavior to try and write a buffer which is smaller than the given length. * however it's undefined behavior to try and write a buffer which is smaller than the given length.
@ -82,14 +83,14 @@ public:
* @param data The buffer to write * @param data The buffer to write
* @param length The amount to write * @param length The amount to write
*/ */
inline void writeBytes(const void* data, atUint64 len) {writeUBytes((atUint8*)data, len);} void writeBytes(const void* data, atUint64 len) {writeUBytes((atUint8*)data, len);}
/** @brief Writes an Int16 to the buffer and advances the buffer. /** @brief Writes an Int16 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt16(atInt16 val) void writeInt16(atInt16 val)
{ {
if (m_endian == Big) if (m_endian == Big)
utility::BigInt16(val); utility::BigInt16(val);
@ -97,62 +98,62 @@ public:
utility::LittleInt16(val); utility::LittleInt16(val);
writeUBytes((atUint8*)&val, 2); writeUBytes((atUint8*)&val, 2);
} }
inline void writeVal(atInt16 val) {writeInt16(val);} void writeVal(atInt16 val) {writeInt16(val);}
/** @brief Writes an Int16 to the buffer and advances the buffer. /** @brief Writes an Int16 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt16Little(atInt16 val) void writeInt16Little(atInt16 val)
{ {
utility::LittleInt16(val); utility::LittleInt16(val);
writeUBytes((atUint8*)&val, 2); writeUBytes((atUint8*)&val, 2);
} }
inline void writeValLittle(atInt16 val) {writeInt16Little(val);} void writeValLittle(atInt16 val) {writeInt16Little(val);}
/** @brief Writes an Int16 to the buffer and advances the buffer. /** @brief Writes an Int16 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt16Big(atInt16 val) void writeInt16Big(atInt16 val)
{ {
utility::BigInt16(val); utility::BigInt16(val);
writeUBytes((atUint8*)&val, 2); writeUBytes((atUint8*)&val, 2);
} }
inline void writeValBig(atInt16 val) {writeInt16Big(val);} void writeValBig(atInt16 val) {writeInt16Big(val);}
/** @brief Writes an Uint16 to the buffer and advances the buffer. /** @brief Writes an Uint16 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings * It also swaps the bytes depending on the platform and Stream settings
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint16(atUint16 val) {writeInt16(val);} void writeUint16(atUint16 val) {writeInt16(val);}
inline void writeVal(atUint16 val) {writeUint16(val);} void writeVal(atUint16 val) {writeUint16(val);}
/** @brief Writes an Uint16 to the buffer and advances the buffer. /** @brief Writes an Uint16 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform * It also swaps the bytes against little depending on the platform
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint16Little(atUint16 val) {writeInt16Little(val);} void writeUint16Little(atUint16 val) {writeInt16Little(val);}
inline void writeValLittle(atUint16 val) {writeUint16Little(val);} void writeValLittle(atUint16 val) {writeUint16Little(val);}
/** @brief Writes an Uint16 to the buffer and advances the buffer. /** @brief Writes an Uint16 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform * It also swaps the bytes against big depending on the platform
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint16Big(atUint16 val) {writeInt16Big(val);} void writeUint16Big(atUint16 val) {writeInt16Big(val);}
inline void writeValBig(atUint16 val) {writeUint16Big(val);} void writeValBig(atUint16 val) {writeUint16Big(val);}
/** @brief Writes an Int32 to the buffer and advances the buffer. /** @brief Writes an Int32 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt32(atInt32 val) void writeInt32(atInt32 val)
{ {
if (m_endian == Big) if (m_endian == Big)
utility::BigInt32(val); utility::BigInt32(val);
@ -160,62 +161,62 @@ public:
utility::LittleInt32(val); utility::LittleInt32(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeVal(atInt32 val) {writeInt32(val);} void writeVal(atInt32 val) {writeInt32(val);}
/** @brief Writes an Int32 to the buffer and advances the buffer. /** @brief Writes an Int32 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt32Little(atInt32 val) void writeInt32Little(atInt32 val)
{ {
utility::LittleInt32(val); utility::LittleInt32(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeValLittle(atInt32 val) {writeInt32Little(val);} void writeValLittle(atInt32 val) {writeInt32Little(val);}
/** @brief Writes an Int32 to the buffer and advances the buffer. /** @brief Writes an Int32 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt32Big(atInt32 val) void writeInt32Big(atInt32 val)
{ {
utility::BigInt32(val); utility::BigInt32(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeValBig(atInt32 val) {writeInt32Big(val);} void writeValBig(atInt32 val) {writeInt32Big(val);}
/** @brief Writes an Uint32 to the buffer and advances the buffer. /** @brief Writes an Uint32 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint32(atUint32 val) {writeInt32(val);} void writeUint32(atUint32 val) {writeInt32(val);}
inline void writeVal(atUint32 val) {writeUint32(val);} void writeVal(atUint32 val) {writeUint32(val);}
/** @brief Writes an Uint32 to the buffer and advances the buffer. /** @brief Writes an Uint32 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint32Little(atUint32 val) {writeInt32Little(val);} void writeUint32Little(atUint32 val) {writeInt32Little(val);}
inline void writeValLittle(atUint32 val) {writeUint32Little(val);} void writeValLittle(atUint32 val) {writeUint32Little(val);}
/** @brief Writes an Uint32 to the buffer and advances the buffer. /** @brief Writes an Uint32 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint32Big(atUint32 val) {writeInt32Big(val);} void writeUint32Big(atUint32 val) {writeInt32Big(val);}
inline void writeValBig(atUint32 val) {writeUint32Big(val);} void writeValBig(atUint32 val) {writeUint32Big(val);}
/** @brief Writes an Int64 to the buffer and advances the buffer. /** @brief Writes an Int64 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt64(atInt64 val) void writeInt64(atInt64 val)
{ {
if (m_endian == Big) if (m_endian == Big)
utility::BigInt64(val); utility::BigInt64(val);
@ -223,101 +224,101 @@ public:
utility::LittleInt64(val); utility::LittleInt64(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeVal(atInt64 val) {writeInt64(val);} void writeVal(atInt64 val) {writeInt64(val);}
/** @brief Writes an Int64 to the buffer and advances the buffer. /** @brief Writes an Int64 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt64Little(atInt64 val) void writeInt64Little(atInt64 val)
{ {
utility::LittleInt64(val); utility::LittleInt64(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeValLittle(atInt64 val) {writeInt64Little(val);} void writeValLittle(atInt64 val) {writeInt64Little(val);}
/** @brief Writes an Int64 to the buffer and advances the buffer. /** @brief Writes an Int64 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeInt64Big(atInt64 val) void writeInt64Big(atInt64 val)
{ {
utility::BigInt64(val); utility::BigInt64(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeValBig(atInt64 val) {writeInt64Big(val);} void writeValBig(atInt64 val) {writeInt64Big(val);}
/** @brief Writes an Uint64 to the buffer and advances the buffer. /** @brief Writes an Uint64 to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint64(atUint64 val) {writeInt64(val);} void writeUint64(atUint64 val) {writeInt64(val);}
inline void writeVal(atUint64 val) {writeUint64(val);} void writeVal(atUint64 val) {writeUint64(val);}
/** @brief Writes an Uint64 to the buffer and advances the buffer. /** @brief Writes an Uint64 to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint64Little(atUint64 val) {writeInt64Little(val);} void writeUint64Little(atUint64 val) {writeInt64Little(val);}
inline void writeValLittle(atUint64 val) {writeUint64Little(val);} void writeValLittle(atUint64 val) {writeUint64Little(val);}
/** @brief Writes an Uint64 to the buffer and advances the buffer. /** @brief Writes an Uint64 to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeUint64Big(atUint64 val) {writeInt64Big(val);} void writeUint64Big(atUint64 val) {writeInt64Big(val);}
inline void writeValBig(atUint64 val) {writeUint64Big(val);} void writeValBig(atUint64 val) {writeUint64Big(val);}
/** @brief Writes an float to the buffer and advances the buffer. /** @brief Writes an float to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeFloat(float val) void writeFloat(float val)
{ {
if (m_endian == Big) if (m_endian == Big)
utility::BigFloat(val); val = utility::BigFloat(val);
else else
utility::LittleFloat(val); val = utility::LittleFloat(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeVal(float val) {writeFloat(val);} void writeVal(float val) {writeFloat(val);}
/** @brief Writes an float to the buffer and advances the buffer. /** @brief Writes an float to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeFloatLittle(float val) void writeFloatLittle(float val)
{ {
utility::LittleFloat(val); utility::LittleFloat(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeValLittle(float val) {writeFloatLittle(val);} void writeValLittle(float val) {writeFloatLittle(val);}
/** @brief Writes an float to the buffer and advances the buffer. /** @brief Writes an float to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeFloatBig(float val) void writeFloatBig(float val)
{ {
utility::BigFloat(val); val = utility::BigFloat(val);
writeUBytes((atUint8*)&val, 4); writeUBytes((atUint8*)&val, 4);
} }
inline void writeValBig(float val) {writeFloatBig(val);} void writeValBig(float val) {writeFloatBig(val);}
/** @brief Writes an double to the buffer and advances the buffer. /** @brief Writes an double to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeDouble(double val) void writeDouble(double val)
{ {
if (m_endian == Big) if (m_endian == Big)
utility::BigDouble(val); utility::BigDouble(val);
@ -325,365 +326,365 @@ public:
utility::LittleDouble(val); utility::LittleDouble(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeVal(double val) {writeDouble(val);} void writeVal(double val) {writeDouble(val);}
/** @brief Writes an double to the buffer and advances the buffer. /** @brief Writes an double to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeDoubleLittle(double val) void writeDoubleLittle(double val)
{ {
utility::LittleDouble(val); utility::LittleDouble(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeValLittle(double val) {writeDoubleLittle(val);} void writeValLittle(double val) {writeDoubleLittle(val);}
/** @brief Writes an double to the buffer and advances the buffer. /** @brief Writes an double to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeDoubleBig(double val) void writeDoubleBig(double val)
{ {
utility::BigDouble(val); utility::BigDouble(val);
writeUBytes((atUint8*)&val, 8); writeUBytes((atUint8*)&val, 8);
} }
inline void writeValBig(double val) {writeDoubleBig(val);} void writeValBig(double val) {writeDoubleBig(val);}
/** @brief Writes an bool to the buffer and advances the buffer. /** @brief Writes an bool to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param val The value to write to the buffer * @param val The value to write to the buffer
*/ */
inline void writeBool(bool val) {writeUBytes((atUint8*)&val, 1);} void writeBool(bool val) {writeUBytes((atUint8*)&val, 1);}
inline void writeVal(bool val) {writeBool(val);} void writeVal(bool val) {writeBool(val);}
inline void writeValLittle(bool val) {writeBool(val);} void writeValLittle(bool val) {writeBool(val);}
inline void writeValBig(bool val) {writeBool(val);} void writeValBig(bool val) {writeBool(val);}
/** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2f(const atVec2f& vec) void writeVec2f(const atVec2f& vec)
{ {
atVec2f tmp = vec; simd_floats tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
} }
else else
{ {
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
} }
writeUBytes((atUint8*)&tmp, 8); writeUBytes((atUint8*)tmp.data(), 8);
} }
inline void writeVal(const atVec2f& val) {writeVec2f(val);} void writeVal(const atVec2f& val) {writeVec2f(val);}
/** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2fLittle(const atVec2f& vec) void writeVec2fLittle(const atVec2f& vec)
{ {
atVec2f tmp = vec; simd_floats tmp(vec.simd);
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
writeUBytes((atUint8*)&tmp, 8); writeUBytes((atUint8*)tmp.data(), 8);
} }
inline void writeValLittle(const atVec2f& val) {writeVec2fLittle(val);} void writeValLittle(const atVec2f& val) {writeVec2fLittle(val);}
/** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2f (8 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2fBig(const atVec2f& vec) void writeVec2fBig(const atVec2f& vec)
{ {
atVec2f tmp = vec; simd_floats tmp(vec.simd);
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
writeUBytes((atUint8*)&tmp, 8); writeUBytes((atUint8*)tmp.data(), 8);
} }
inline void writeValBig(const atVec2f& val) {writeVec2fBig(val);} void writeValBig(const atVec2f& val) {writeVec2fBig(val);}
/** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3f(const atVec3f& vec) void writeVec3f(const atVec3f& vec)
{ {
atVec3f tmp = vec; simd_floats tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
utility::BigFloat(tmp.vec[2]); tmp[2] = utility::BigFloat(tmp[2]);
} }
else else
{ {
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
utility::LittleFloat(tmp.vec[2]); tmp[2] = utility::LittleFloat(tmp[2]);
} }
writeUBytes((atUint8*)&tmp, 12); writeUBytes((atUint8*)tmp.data(), 12);
} }
inline void writeVal(const atVec3f& val) {writeVec3f(val);} void writeVal(const atVec3f& val) {writeVec3f(val);}
/** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3fLittle(const atVec3f& vec) void writeVec3fLittle(const atVec3f& vec)
{ {
atVec3f tmp = vec; simd_floats tmp(vec.simd);
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
utility::LittleFloat(tmp.vec[2]); tmp[2] = utility::LittleFloat(tmp[2]);
writeUBytes((atUint8*)&tmp, 12); writeUBytes((atUint8*)tmp.data(), 12);
} }
inline void writeValLittle(const atVec3f& val) {writeVec3fLittle(val);} void writeValLittle(const atVec3f& val) {writeVec3fLittle(val);}
/** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3f (12 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3fBig(const atVec3f& vec) void writeVec3fBig(const atVec3f& vec)
{ {
atVec3f tmp = vec; simd_floats tmp(vec.simd);
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
utility::BigFloat(tmp.vec[2]); tmp[2] = utility::BigFloat(tmp[2]);
writeUBytes((atUint8*)&tmp, 12); writeUBytes((atUint8*)tmp.data(), 12);
} }
inline void writeValBig(const atVec3f& val) {writeVec3fBig(val);} void writeValBig(const atVec3f& val) {writeVec3fBig(val);}
/** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4f(const atVec4f& vec) void writeVec4f(const atVec4f& vec)
{ {
atVec4f tmp = vec; simd_floats tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
utility::BigFloat(tmp.vec[2]); tmp[2] = utility::BigFloat(tmp[2]);
utility::BigFloat(tmp.vec[3]); tmp[3] = utility::BigFloat(tmp[3]);
} }
else else
{ {
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
utility::LittleFloat(tmp.vec[2]); tmp[2] = utility::LittleFloat(tmp[2]);
utility::LittleFloat(tmp.vec[3]); tmp[3] = utility::LittleFloat(tmp[3]);
} }
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeVal(const atVec4f& val) {writeVec4f(val);} void writeVal(const atVec4f& val) {writeVec4f(val);}
/** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4fLittle(const atVec4f& vec) void writeVec4fLittle(const atVec4f& vec)
{ {
atVec4f tmp = vec; simd_floats tmp(vec.simd);
utility::LittleFloat(tmp.vec[0]); tmp[0] = utility::LittleFloat(tmp[0]);
utility::LittleFloat(tmp.vec[1]); tmp[1] = utility::LittleFloat(tmp[1]);
utility::LittleFloat(tmp.vec[2]); tmp[2] = utility::LittleFloat(tmp[2]);
utility::LittleFloat(tmp.vec[3]); tmp[3] = utility::LittleFloat(tmp[3]);
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeValLittle(const atVec4f& val) {writeVec4fLittle(val);} void writeValLittle(const atVec4f& val) {writeVec4fLittle(val);}
/** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4f (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4fBig(const atVec4f& vec) void writeVec4fBig(const atVec4f& vec)
{ {
atVec4f tmp = vec; simd_floats tmp(vec.simd);
utility::BigFloat(tmp.vec[0]); tmp[0] = utility::BigFloat(tmp[0]);
utility::BigFloat(tmp.vec[1]); tmp[1] = utility::BigFloat(tmp[1]);
utility::BigFloat(tmp.vec[2]); tmp[2] = utility::BigFloat(tmp[2]);
utility::BigFloat(tmp.vec[3]); tmp[3] = utility::BigFloat(tmp[3]);
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeValBig(const atVec4f& val) {writeVec4fBig(val);} void writeValBig(const atVec4f& val) {writeVec4fBig(val);}
/** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2d(const atVec2d& vec) void writeVec2d(const atVec2d& vec)
{ {
atVec2d tmp = vec; simd_doubles tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
} }
else else
{ {
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
} }
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeVal(const atVec2d& val) {writeVec2d(val);} void writeVal(const atVec2d& val) {writeVec2d(val);}
/** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2dLittle(const atVec2d& vec) void writeVec2dLittle(const atVec2d& vec)
{ {
atVec2d tmp = vec; simd_doubles tmp(vec.simd);
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeValLittle(const atVec2d& val) {writeVec2dLittle(val);} void writeValLittle(const atVec2d& val) {writeVec2dLittle(val);}
/** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec2d (16 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec2dBig(const atVec2d& vec) void writeVec2dBig(const atVec2d& vec)
{ {
atVec2d tmp = vec; simd_doubles tmp(vec.simd);
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
writeUBytes((atUint8*)&tmp, 16); writeUBytes((atUint8*)tmp.data(), 16);
} }
inline void writeValBig(const atVec2d& val) {writeVec2dBig(val);} void writeValBig(const atVec2d& val) {writeVec2dBig(val);}
/** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3d(const atVec3d& vec) void writeVec3d(const atVec3d& vec)
{ {
atVec3d tmp = vec; simd_doubles tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
utility::BigDouble(tmp.vec[2]); tmp[2] = utility::BigDouble(tmp[2]);
} }
else else
{ {
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
utility::LittleDouble(tmp.vec[2]); tmp[2] = utility::LittleDouble(tmp[2]);
} }
writeUBytes((atUint8*)&tmp, 24); writeUBytes((atUint8*)tmp.data(), 24);
} }
inline void writeVal(const atVec3d& val) {writeVec3d(val);} void writeVal(const atVec3d& val) {writeVec3d(val);}
/** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3dLittle(const atVec3d& vec) void writeVec3dLittle(const atVec3d& vec)
{ {
atVec3d tmp = vec; simd_doubles tmp(vec.simd);
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
utility::LittleDouble(tmp.vec[2]); tmp[2] = utility::LittleDouble(tmp[2]);
writeUBytes((atUint8*)&tmp, 24); writeUBytes((atUint8*)tmp.data(), 24);
} }
inline void writeValLittle(const atVec3d& val) {writeVec3dLittle(val);} void writeValLittle(const atVec3d& val) {writeVec3dLittle(val);}
/** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec3d (24 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec3dBig(const atVec3d& vec) void writeVec3dBig(const atVec3d& vec)
{ {
atVec3d tmp = vec; simd_doubles tmp(vec.simd);
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
utility::BigDouble(tmp.vec[2]); tmp[2] = utility::BigDouble(tmp[2]);
writeUBytes((atUint8*)&tmp, 24); writeUBytes((atUint8*)tmp.data(), 24);
} }
inline void writeValBig(const atVec3d& val) {writeVec3dBig(val);} void writeValBig(const atVec3d& val) {writeVec3dBig(val);}
/** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4d(const atVec4d& vec) void writeVec4d(const atVec4d& vec)
{ {
atVec4d tmp = vec; simd_doubles tmp(vec.simd);
if (m_endian == Big) if (m_endian == Big)
{ {
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
utility::BigDouble(tmp.vec[2]); tmp[2] = utility::BigDouble(tmp[2]);
utility::BigDouble(tmp.vec[3]); tmp[3] = utility::BigDouble(tmp[3]);
} }
else else
{ {
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
utility::LittleDouble(tmp.vec[2]); tmp[2] = utility::LittleDouble(tmp[2]);
utility::LittleDouble(tmp.vec[3]); tmp[3] = utility::LittleDouble(tmp[3]);
} }
writeUBytes((atUint8*)&tmp, 32); writeUBytes((atUint8*)tmp.data(), 32);
} }
inline void writeVal(const atVec4d& val) {writeVec4d(val);} void writeVal(const atVec4d& val) {writeVec4d(val);}
/** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against little depending on the platform. * It also swaps the bytes against little depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4dLittle(const atVec4d& vec) void writeVec4dLittle(const atVec4d& vec)
{ {
atVec4d tmp = vec; simd_doubles tmp(vec.simd);
utility::LittleDouble(tmp.vec[0]); tmp[0] = utility::LittleDouble(tmp[0]);
utility::LittleDouble(tmp.vec[1]); tmp[1] = utility::LittleDouble(tmp[1]);
utility::LittleDouble(tmp.vec[2]); tmp[2] = utility::LittleDouble(tmp[2]);
utility::LittleDouble(tmp.vec[3]); tmp[3] = utility::LittleDouble(tmp[3]);
writeUBytes((atUint8*)&tmp, 32); writeUBytes((atUint8*)tmp.data(), 32);
} }
inline void writeValLittle(const atVec4d& val) {writeVec4dLittle(val);} void writeValLittle(const atVec4d& val) {writeVec4dLittle(val);}
/** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer. /** @brief Writes an atVec4d (32 bytes) to the buffer and advances the buffer.
* It also swaps the bytes against big depending on the platform. * It also swaps the bytes against big depending on the platform.
* *
* @param vec The value to write to the buffer * @param vec The value to write to the buffer
*/ */
inline void writeVec4dBig(const atVec4d& vec) void writeVec4dBig(const atVec4d& vec)
{ {
atVec4d tmp = vec; simd_doubles tmp(vec.simd);
utility::BigDouble(tmp.vec[0]); tmp[0] = utility::BigDouble(tmp[0]);
utility::BigDouble(tmp.vec[1]); tmp[1] = utility::BigDouble(tmp[1]);
utility::BigDouble(tmp.vec[2]); tmp[2] = utility::BigDouble(tmp[2]);
utility::BigDouble(tmp.vec[3]); tmp[3] = utility::BigDouble(tmp[3]);
writeUBytes((atUint8*)&tmp, 32); writeUBytes((atUint8*)tmp.data(), 32);
} }
inline void writeValBig(const atVec4d& val) {writeVec4dBig(val);} void writeValBig(const atVec4d& val) {writeVec4dBig(val);}
/** @brief Converts a UTF8 string to a wide-char string in the buffer and advances the buffer. /** @brief Converts a UTF8 string to a wide-char string in the buffer and advances the buffer.
* It also swaps the bytes depending on the platform and Stream settings. * It also swaps the bytes depending on the platform and Stream settings.
@ -693,7 +694,7 @@ public:
* *
* Endianness is set with setEndian * Endianness is set with setEndian
*/ */
inline void writeStringAsWString(std::string_view str, atInt32 fixedLen = -1) void writeStringAsWString(std::string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -751,7 +752,7 @@ public:
* *
* Endianness is little * Endianness is little
*/ */
inline void writeStringAsWStringLittle(std::string_view str, atInt32 fixedLen = -1) void writeStringAsWStringLittle(std::string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -809,7 +810,7 @@ public:
* *
* Endianness is big * Endianness is big
*/ */
inline void writeStringAsWStringBig(std::string_view str, atInt32 fixedLen = -1) void writeStringAsWStringBig(std::string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -865,7 +866,7 @@ public:
* @param str The string to write to the buffer * @param str The string to write to the buffer
* @param fixedLen If not -1, the number of characters to zero-fill string to * @param fixedLen If not -1, the number of characters to zero-fill string to
*/ */
inline void writeString(std::string_view str, atInt32 fixedLen = -1) void writeString(std::string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -895,7 +896,7 @@ public:
} }
} }
} }
inline void writeVal(std::string_view val) {writeString(val);} void writeVal(std::string_view val) {writeString(val);}
/** @brief Writes an wstring to the buffer and advances the buffer. /** @brief Writes an wstring to the buffer and advances the buffer.
* *
@ -904,7 +905,7 @@ public:
* *
* Endianness is set with setEndian * Endianness is set with setEndian
*/ */
inline void writeWString(std::wstring_view str, atInt32 fixedLen = -1) void writeWString(std::wstring_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -934,7 +935,7 @@ public:
} }
} }
} }
inline void writeVal(std::wstring_view val) {writeWString(val);} void writeVal(std::wstring_view val) {writeWString(val);}
/** @brief Writes an wstring to the buffer and advances the buffer. /** @brief Writes an wstring to the buffer and advances the buffer.
* *
@ -943,7 +944,7 @@ public:
* *
* Endianness is little * Endianness is little
*/ */
inline void writeWStringLittle(std::wstring_view str, atInt32 fixedLen = -1) void writeWStringLittle(std::wstring_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -973,7 +974,7 @@ public:
} }
} }
} }
inline void writeValLittle(std::wstring_view val) {writeWStringLittle(val);} void writeValLittle(std::wstring_view val) {writeWStringLittle(val);}
/** @brief Writes an wstring to the buffer and advances the buffer. /** @brief Writes an wstring to the buffer and advances the buffer.
* *
@ -982,7 +983,7 @@ public:
* *
* Endianness is big * Endianness is big
*/ */
inline void writeWStringBig(std::wstring_view str, atInt32 fixedLen = -1) void writeWStringBig(std::wstring_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -1012,7 +1013,7 @@ public:
} }
} }
} }
inline void writeValBig(std::wstring_view val) {writeWStringBig(val);} void writeValBig(std::wstring_view val) {writeWStringBig(val);}
/** @brief Writes a u16string to the buffer and advances the buffer. /** @brief Writes a u16string to the buffer and advances the buffer.
* *
@ -1021,7 +1022,7 @@ public:
* *
* Endianness is big * Endianness is big
*/ */
inline void writeU16StringBig(std::u16string_view str, atInt32 fixedLen = -1) void writeU16StringBig(std::u16string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -1051,7 +1052,7 @@ public:
} }
} }
} }
inline void writeValBig(std::u16string_view val) {writeU16StringBig(val);} void writeValBig(std::u16string_view val) {writeU16StringBig(val);}
/** @brief Writes a u16string to the buffer and advances the buffer. /** @brief Writes a u16string to the buffer and advances the buffer.
* *
@ -1060,7 +1061,7 @@ public:
* *
* Endianness is big * Endianness is big
*/ */
inline void writeU32StringBig(std::u32string_view str, atInt32 fixedLen = -1) void writeU32StringBig(std::u32string_view str, atInt32 fixedLen = -1)
{ {
if (fixedLen == 0) if (fixedLen == 0)
return; return;
@ -1090,9 +1091,9 @@ public:
} }
} }
} }
inline void writeValBig(std::u32string_view val) {writeU32StringBig(val);} void writeValBig(std::u32string_view val) {writeU32StringBig(val);}
inline void fill(atUint8 val, atUint64 length) void fill(atUint8 val, atUint64 length)
{ {
if (length == 0) if (length == 0)
return; return;
@ -1102,7 +1103,7 @@ public:
writeUBytes(tmp.get(), length); writeUBytes(tmp.get(), length);
} }
inline void fill(atInt8 val, atUint64 length) void fill(atInt8 val, atUint64 length)
{fill((atUint8)val, length);} {fill((atUint8)val, length);}
/** @brief Performs automatic std::vector enumeration writes using numeric type T /** @brief Performs automatic std::vector enumeration writes using numeric type T

View File

@ -12,106 +12,13 @@ using atInt64 = int64_t;
using atUint64 = uint64_t; using atUint64 = uint64_t;
// Vector types // Vector types
#if __SSE__ #include "simd/simd.hpp"
#include <immintrin.h> typedef struct { athena::simd<float> simd; } atVec2f;
#ifndef _WIN32 typedef struct { athena::simd<float> simd; } atVec3f;
#include <mm_malloc.h> typedef struct { athena::simd<float> simd; } atVec4f;
#endif typedef struct { athena::simd<double> simd; } atVec2d;
#endif typedef struct { athena::simd<double> simd; } atVec3d;
typedef struct { athena::simd<double> simd; } atVec4d;
#include <new>
#define AT_ALIGNED_ALLOCATOR \
void* operator new(size_t bytes) noexcept \
{return _mm_malloc(bytes, 16);} \
void* operator new[](size_t bytes) noexcept \
{return _mm_malloc(bytes, 16);} \
void operator delete(void* buf) noexcept \
{_mm_free(buf);} \
void operator delete[](void* buf) noexcept \
{_mm_free(buf);}
#define AT_ALIGNED_ALLOCATOR32 \
void* operator new(size_t bytes) noexcept \
{return _mm_malloc(bytes, 32);} \
void* operator new[](size_t bytes) noexcept \
{return _mm_malloc(bytes, 32);} \
void operator delete(void* buf) noexcept \
{_mm_free(buf);} \
void operator delete[](void* buf) noexcept \
{_mm_free(buf);}
typedef union alignas(16)
{
#if __clang__
float clangVec __attribute__((__vector_size__(8)));
#endif
#if __SSE__
__m128 mVec128;
AT_ALIGNED_ALLOCATOR
#endif
float vec[2];
} atVec2f;
typedef union alignas(16)
{
#if __clang__
float clangVec __attribute__((__vector_size__(12)));
#endif
#if __SSE__
__m128 mVec128;
AT_ALIGNED_ALLOCATOR
#endif
float vec[3];
} atVec3f;
typedef union alignas(16)
{
#if __clang__
float clangVec __attribute__((__vector_size__(16)));
#endif
#if __SSE__
__m128 mVec128;
AT_ALIGNED_ALLOCATOR
#endif
float vec[4];
} atVec4f;
typedef union alignas(16)
{
#if __SSE__
__m128d mVec128;
AT_ALIGNED_ALLOCATOR
#endif
double vec[2];
} atVec2d;
typedef union alignas(32)
{
#if __AVX__
__m256d mVec256;
AT_ALIGNED_ALLOCATOR32
#elif __SSE__
AT_ALIGNED_ALLOCATOR
#endif
#if __SSE__
__m128d mVec128[2];
#endif
double vec[3];
} atVec3d;
typedef union alignas(32)
{
#if __AVX__
__m256d mVec256;
AT_ALIGNED_ALLOCATOR32
#elif __SSE__
AT_ALIGNED_ALLOCATOR
#endif
#if __SSE__
__m128d mVec128[2];
#endif
double vec[4];
} atVec4d;
#ifndef UNUSED #ifndef UNUSED
#define UNUSED(x) ((void)x) #define UNUSED(x) ((void)x)

View File

@ -163,28 +163,28 @@ inline atUint64 BigUint64(atUint64& val)
return val; return val;
} }
inline float LittleFloat(float& val) inline float LittleFloat(float val)
{ {
if (athena::utility::isSystemBigEndian()) if (athena::utility::isSystemBigEndian())
val = athena::utility::swapFloat(val); val = athena::utility::swapFloat(val);
return val; return val;
} }
inline float BigFloat(float& val) inline float BigFloat(float val)
{ {
if (!athena::utility::isSystemBigEndian()) if (!athena::utility::isSystemBigEndian())
val = athena::utility::swapFloat(val); val = athena::utility::swapFloat(val);
return val; return val;
} }
inline double LittleDouble(double& val) inline double LittleDouble(double val)
{ {
if (athena::utility::isSystemBigEndian()) if (athena::utility::isSystemBigEndian())
val = athena::utility::swapDouble(val); val = athena::utility::swapDouble(val);
return val; return val;
} }
inline double BigDouble(double& val) inline double BigDouble(double val)
{ {
if (!athena::utility::isSystemBigEndian()) if (!athena::utility::isSystemBigEndian())
val = athena::utility::swapDouble(val); val = athena::utility::swapDouble(val);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
#pragma once
#define _ATHENA_SIMD_INCLUDED
namespace athena::_simd { using namespace std; }
#include "parallelism_v2_simd.hpp"
#if _M_IX86_FP >= 1 || _M_X64
#define __SSE__ 1
#endif
#if __AVX__
#include "simd_avx.hpp"
#elif __SSE__
#include "simd_sse.hpp"
#else
namespace simd_abi {
template<typename T> struct athena_native {};
template<> struct athena_native<float> { using type = fixed_size<4>; };
template<> struct athena_native<double> { using type = fixed_size<4>; };
}
#endif
namespace athena {
template<typename T> using simd = _simd::simd<T,
typename _simd::simd_abi::athena_native<T>::type>;
template<typename T>
using simd_values = _simd::simd_data<simd<T>>;
using simd_floats = simd_values<float>;
using simd_doubles = simd_values<double>;
}

View File

@ -0,0 +1,188 @@
#pragma once
#ifndef _ATHENA_SIMD_INCLUDED
#error simd_avx.hpp must not be included directly. Include simd.hpp instead.
#endif
#include "simd_sse.hpp"
#include <immintrin.h>
namespace athena::_simd {
// __m256d storage for AVX
template<>
class __simd_storage<double, m256d_abi> {
public:
using storage_type = __m256d;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, double __val) noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm256_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_ = _mm256_set_pd(d, c, b, a);
}
void __broadcast(double __val) noexcept {
__storage_ = _mm256_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
}
double __dot3(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
}
double __dot4(const __simd_storage<double, m256d_abi>& other) const noexcept {
alignas(32) std::array<double, 4> sse_data;
_mm256_store_pd(sse_data.data(), _mm256_mul_pd(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
}
void __copy_from(const simd_data<simd<double, m256d_abi>>& __buffer) noexcept {
__storage_ = _mm256_load_pd(__buffer.data());
}
void __copy_to(simd_data<simd<double, m256d_abi>>& __buffer) const noexcept {
_mm256_store_pd(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_ = _mm256_cvtps_pd(other.__storage_);
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m256d mask storage for AVX
template<>
class __simd_mask_storage<double, m256d_abi> : public __simd_storage<double, m256d_abi> {
public:
bool __get(size_t __index) const noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(32) uint64_t sse_data[4];
_mm256_store_pd(reinterpret_cast<double*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT64_MAX : 0;
__storage_ = _mm256_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m256d_abi> simd<double, m256d_abi>::operator-() const {
return _mm256_xor_pd(__s_.__storage_, _mm256_set1_pd(-0.0));
}
inline simd<double, m256d_abi>
operator+(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator-(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator*(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>
operator/(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi> ret;
ret.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<double, m256d_abi>&
operator+=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_add_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator-=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_sub_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator*=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_mul_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>&
operator/=(simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
a.__s_.__storage_ = _mm256_div_pd(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<double, m256d_abi>::mask_type
operator==(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_EQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator!=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_NEQ_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<=(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LE_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator>(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_GT_OQ);
return ret;
}
inline simd<double, m256d_abi>::mask_type
operator<(const simd<double, m256d_abi>& a, const simd<double, m256d_abi>& b) {
simd<double, m256d_abi>::mask_type ret;
ret.__s_.__storage_ = _mm256_cmp_pd(a.__s_.__storage_, b.__s_.__storage_, _CMP_LT_OQ);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m256d_abi>& other) {
__storage_ = _mm256_cvtpd_ps(other.__storage_);
}
namespace simd_abi {
template<> struct athena_native<double> { using type = m256d_abi; };
} // namespace simd_abi
} // namespace athena::_simd

View File

@ -0,0 +1,455 @@
#pragma once
#ifndef _ATHENA_SIMD_INCLUDED
#error simd_sse.hpp must not be included directly. Include simd.hpp instead.
#endif
#include <xmmintrin.h>
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace athena::_simd {
// __m128 ABI
using m128_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 1), 4>;
// __m128d ABI
using m128d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 2), 4>;
#ifdef __AVX__
// __m256d ABI
using m256d_abi = __simd_abi<_StorageKind(int(_StorageKind::_VecExt) + 3), 4>;
#endif
template <>
class __simd_storage<double, m128d_abi>;
#ifdef __AVX__
template <>
class __simd_storage<double, m256d_abi>;
#endif
// __m128 storage for SSE2+
template <>
class __simd_storage<float, m128_abi> {
public:
using storage_type = __m128;
storage_type __storage_;
float __get(size_t __index) const noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
return sse_data[__index];
}
void __set(size_t __index, float __val) noexcept {
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), __storage_);
sse_data[__index] = __val;
__storage_ = _mm_load_ps(sse_data.data());
}
void __set4(float a, float b, float c, float d) noexcept {
__storage_ = _mm_set_ps(d, c, b, a);
}
void __broadcast(float __val) noexcept {
__storage_ = _mm_set1_ps(__val);
}
float __dot2(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x3F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1];
#endif
}
float __dot3(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0x7F));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2];
#endif
}
float __dot4(const __simd_storage<float, m128_abi>& other) const noexcept {
#if __SSE4_1__
float ret;
_mm_store_ss(&ret, _mm_dp_ps(__storage_, other.__storage_, 0xFF));
return ret;
#else
alignas(16) std::array<float, 4> sse_data;
_mm_store_ps(sse_data.data(), _mm_mul_ps(__storage_, other.__storage_));
return sse_data[0] + sse_data[1] + sse_data[2] + sse_data[3];
#endif
}
template<int x, int y, int z, int w>
__simd_storage __shuffle() const noexcept {
__simd_storage s;
s.__storage_ = _mm_shuffle_ps(__storage_, __storage_, _MM_SHUFFLE(w, z, y, x));
return s;
}
void __copy_from(const simd_data<simd<float, m128_abi>>& __buffer) noexcept {
__storage_ = _mm_load_ps(__buffer.data());
}
void __copy_to(simd_data<simd<float, m128_abi>>& __buffer) const noexcept {
_mm_store_ps(__buffer.data(), __storage_);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<double, m128d_abi>& other);
#ifdef __AVX__
explicit __simd_storage(const __simd_storage<double, m256d_abi>& other);
#endif
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128 mask storage for SSE2+
template <>
class __simd_mask_storage<float, m128_abi> : public __simd_storage<float, m128_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint32_t sse_data[4];
_mm_store_ps(reinterpret_cast<float*>(sse_data), __storage_);
sse_data[__index] = __val ? UINT32_MAX : 0;
__storage_ = _mm_load_ps(reinterpret_cast<float*>(sse_data));
}
};
template <>
inline simd<float, m128_abi> simd<float, m128_abi>::operator-() const {
return _mm_xor_ps(__s_.__storage_, _mm_set1_ps(-0.f));
}
inline simd<float, m128_abi>
operator+(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator-(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator*(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>
operator/(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi> ret;
ret.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>&
operator+=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_add_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator-=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_sub_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator*=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_mul_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>&
operator/=(simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
a.__s_.__storage_ = _mm_div_ps(a.__s_.__storage_, b.__s_.__storage_);
return a;
}
inline simd<float, m128_abi>::mask_type
operator==(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpeq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator!=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpneq_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpge_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<=(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmple_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator>(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmpgt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
inline simd<float, m128_abi>::mask_type
operator<(const simd<float, m128_abi>& a, const simd<float, m128_abi>& b) {
simd<float, m128_abi>::mask_type ret;
ret.__s_.__storage_ = _mm_cmplt_ps(a.__s_.__storage_, b.__s_.__storage_);
return ret;
}
// __m128d storage for SSE2+
template <>
class __simd_storage<double, m128d_abi> {
public:
using storage_type = std::array<__m128d, 2>;
storage_type __storage_;
double __get(size_t __index) const noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
return sse_data[__index % 2];
}
void __set(size_t __index, double __val) noexcept {
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), __storage_[__index / 2]);
sse_data[__index % 2] = __val;
__storage_[__index / 2] = _mm_load_pd(sse_data.data());
}
void __set4(double a, double b, double c, double d) noexcept {
__storage_[0] = _mm_set_pd(b, a);
__storage_[1] = _mm_set_pd(d, c);
}
void __broadcast(double __val) noexcept {
for (int i = 0; i < 2; ++i)
__storage_[i] = _mm_set1_pd(__val);
}
double __dot2(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
return ret;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
return sse_data[0] + sse_data[1];
#endif
}
double __dot3(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return ret + sse_data2[0];
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0];
#endif
}
double __dot4(const __simd_storage<double, m128d_abi>& other) const noexcept {
#if __SSE4_1__
double ret;
_mm_store_sd(&ret, _mm_dp_pd(__storage_[0], other.__storage_[0], 0x3F));
double ret2;
_mm_store_sd(&ret2, _mm_dp_pd(__storage_[1], other.__storage_[1], 0x3F));
return ret + ret2;
#else
alignas(16) std::array<double, 2> sse_data;
_mm_store_pd(sse_data.data(), _mm_mul_pd(__storage_[0], other.__storage_[0]));
alignas(16) std::array<double, 2> sse_data2;
_mm_store_pd(sse_data2.data(), _mm_mul_pd(__storage_[1], other.__storage_[1]));
return sse_data[0] + sse_data[1] + sse_data2[0] + sse_data2[1];
#endif
}
void __copy_from(const simd_data<simd<double, m128d_abi>>& __buffer) noexcept {
__storage_[0] = _mm_load_pd(__buffer.data());
__storage_[1] = _mm_load_pd(__buffer.data() + 2);
}
void __copy_to(simd_data<simd<double, m128d_abi>>& __buffer) const noexcept {
_mm_store_pd(__buffer.data(), __storage_[0]);
_mm_store_pd(__buffer.data() + 2, __storage_[1]);
}
__simd_storage() = default;
explicit __simd_storage(const __simd_storage<float, m128_abi>& other) {
__storage_[0] = _mm_cvtps_pd(other.__storage_);
__storage_[1] = _mm_cvtps_pd(_mm_movehl_ps(other.__storage_, other.__storage_));
}
explicit __simd_storage(const storage_type& s) : __storage_(s) {}
const storage_type& __native() const { return __storage_; }
};
// __m128d mask storage for SSE2+
template <>
class __simd_mask_storage<double, m128d_abi> : public __simd_storage<double, m128d_abi>
{
public:
bool __get(size_t __index) const noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
return sse_data[__index] != 0;
}
void __set(size_t __index, bool __val) noexcept {
alignas(16) uint64_t sse_data[2];
_mm_store_pd(reinterpret_cast<double*>(sse_data), __storage_[__index / 2]);
sse_data[__index % 2] = __val ? UINT64_MAX : 0;
__storage_[__index / 2] = _mm_load_pd(reinterpret_cast<double*>(sse_data));
}
};
template <>
inline simd<double, m128d_abi> simd<double, m128d_abi>::operator-() const {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_xor_pd(__s_.__storage_[i], _mm_set1_pd(-0.0));
return ret;
}
inline simd<double, m128d_abi>
operator+(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator-(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator*(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>
operator/(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi> ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>&
operator+=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_add_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator-=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_sub_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator*=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_mul_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>&
operator/=(simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
for (int i = 0; i < 2; ++i)
a.__s_.__storage_[i] = _mm_div_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return a;
}
inline simd<double, m128d_abi>::mask_type
operator==(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpeq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator!=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpneq_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpge_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<=(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmple_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator>(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmpgt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline simd<double, m128d_abi>::mask_type
operator<(const simd<double, m128d_abi>& a, const simd<double, m128d_abi>& b) {
simd<double, m128d_abi>::mask_type ret;
for (int i = 0; i < 2; ++i)
ret.__s_.__storage_[i] = _mm_cmplt_pd(a.__s_.__storage_[i], b.__s_.__storage_[i]);
return ret;
}
inline __simd_storage<float, m128_abi>::__simd_storage(const __simd_storage<double, m128d_abi>& other) {
__storage_ = _mm_movelh_ps(_mm_cvtpd_ps(other.__storage_[0]), _mm_cvtpd_ps(other.__storage_[1]));
}
namespace simd_abi {
template<typename T> struct athena_native {};
template<> struct athena_native<float> { using type = m128_abi; };
#ifndef __AVX__
template<> struct athena_native<double> { using type = m128d_abi; };
#endif
} // namespace simd_abi
} // namespace athena::_simd

View File

@ -184,8 +184,12 @@ std::unique_ptr<YAMLNode> ValToNode(double val)
template <typename RETURNTYPE> template <typename RETURNTYPE>
RETURNTYPE NodeToVec(const YAMLNode* node) RETURNTYPE NodeToVec(const YAMLNode* node)
{ {
constexpr bool isDouble = std::is_same<RETURNTYPE, atVec2d>::value ||
std::is_same<RETURNTYPE, atVec3d>::value ||
std::is_same<RETURNTYPE, atVec4d>::value;
RETURNTYPE retval = {}; RETURNTYPE retval = {};
auto it = node->m_seqChildren.begin(); auto it = node->m_seqChildren.begin();
simd_values<std::conditional_t<isDouble, double, float>> f;
for (size_t i=0; for (size_t i=0;
i<4 && it != node->m_seqChildren.end(); i<4 && it != node->m_seqChildren.end();
++i, ++it) ++i, ++it)
@ -193,16 +197,15 @@ RETURNTYPE NodeToVec(const YAMLNode* node)
YAMLNode* snode = it->get(); YAMLNode* snode = it->get();
if (snode->m_type == YAML_SCALAR_NODE) if (snode->m_type == YAML_SCALAR_NODE)
{ {
if (std::is_same<RETURNTYPE, atVec2d>::value || if (isDouble)
std::is_same<RETURNTYPE, atVec3d>::value || f[i] = NodeToVal<double>(snode);
std::is_same<RETURNTYPE, atVec4d>::value)
retval.vec[i] = NodeToVal<double>(snode);
else else
retval.vec[i] = NodeToVal<float>(snode); f[i] = NodeToVal<float>(snode);
} }
else else
retval.vec[i] = 0.0; f[i] = 0.0;
} }
retval.simd.copy_from(f);
return retval; return retval;
} }
@ -216,10 +219,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec2f& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(2); ret->m_seqChildren.reserve(2);
simd_floats f(val.simd);
for (size_t i=0 ; i<2 ; ++i) for (size_t i=0 ; i<2 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);
@ -237,10 +241,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec3f& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(3); ret->m_seqChildren.reserve(3);
simd_floats f(val.simd);
for (size_t i=0 ; i<3 ; ++i) for (size_t i=0 ; i<3 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);
@ -258,10 +263,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec4f& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(4); ret->m_seqChildren.reserve(4);
simd_floats f(val.simd);
for (size_t i=0 ; i<4 ; ++i) for (size_t i=0 ; i<4 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);
@ -279,10 +285,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec2d& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(2); ret->m_seqChildren.reserve(2);
simd_doubles f(val.simd);
for (size_t i=0 ; i<2 ; ++i) for (size_t i=0 ; i<2 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);
@ -300,10 +307,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec3d& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(3); ret->m_seqChildren.reserve(3);
simd_doubles f(val.simd);
for (size_t i=0 ; i<3 ; ++i) for (size_t i=0 ; i<3 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);
@ -321,10 +329,11 @@ std::unique_ptr<YAMLNode> ValToNode(const atVec4d& val)
{ {
YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE); YAMLNode* ret = new YAMLNode(YAML_SEQUENCE_NODE);
ret->m_seqChildren.reserve(4); ret->m_seqChildren.reserve(4);
simd_doubles f(val.simd);
for (size_t i=0 ; i<4 ; ++i) for (size_t i=0 ; i<4 ; ++i)
{ {
char str[64]; char str[64];
snprintf(str, 64, "%f", val.vec[i]); snprintf(str, 64, "%f", f[i]);
YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE); YAMLNode* comp = new YAMLNode(YAML_SCALAR_NODE);
comp->m_scalarString = str; comp->m_scalarString = str;
ret->m_seqChildren.emplace_back(comp); ret->m_seqChildren.emplace_back(comp);