diff --git a/Common/TString.cpp b/Common/TString.cpp new file mode 100644 index 00000000..4632920b --- /dev/null +++ b/Common/TString.cpp @@ -0,0 +1,105 @@ +#include "TString.h" +#include + +// ************ TString ************ +TString::TString(const wchar_t* pkText) +{ + *this = TWideString(pkText).ToUTF8(); +} + +TString::TString(const TWideString& rkText) +{ + *this = rkText.ToUTF8(); +} + +TWideString TString::ToUTF16() const +{ + TWideString out; + const char *pkCStr = CString(); + + while (pkCStr[0]) + { + // Step 1: decode UTF-8 code point + wchar_t CodePoint; + + // One byte + if ((pkCStr[0] & 0x80) == 0) + { + CodePoint = pkCStr[0] & 0x7FFFFFFF; + pkCStr++; + } + + // Two bytes + else if ((pkCStr[0] & 0xE0) == 0xC0) + { + CodePoint = (((pkCStr[0] & 0x1F) << 6) | + (pkCStr[1] & 0x3F)); + pkCStr += 2; + } + + // Three bytes + else if ((pkCStr[0] & 0xF0) == 0xE0) + { + CodePoint = (((pkCStr[0] & 0xF) << 12) | + ((pkCStr[1] & 0x3F) << 6) | + (pkCStr[2] & 0x3F)); + pkCStr += 3; + } + + // Four bytes + else if ((pkCStr[0] & 0xF8) == 0xF0) + { + CodePoint = (((pkCStr[0] & 0x7) << 18) | + ((pkCStr[1] & 0x3F) << 12) | + ((pkCStr[2] & 0x3F) << 6) | + (pkCStr[3] & 0x3F)); + pkCStr += 4; + } + + // Five bytes + else if ((pkCStr[0] & 0xFC) == 0xF8) + { + CodePoint = (((pkCStr[0] & 0x3) << 24) | + ((pkCStr[1] & 0x3F) << 18) | + ((pkCStr[2] & 0x3F) << 12) | + ((pkCStr[3] & 0x3F) << 6) | + (pkCStr[4] & 0x3F)); + pkCStr += 5; + } + + // Six bytes + else if ((pkCStr[0] & 0xFE) == 0xFC) + { + CodePoint = (((pkCStr[0] & 0x1) << 30) | + ((pkCStr[1] & 0x3F) << 24) | + ((pkCStr[2] & 0x3F) << 18) | + ((pkCStr[3] & 0x3F) << 12) | + ((pkCStr[4] & 0x3F) << 6) | + (pkCStr[5] & 0x3F)); + pkCStr += 6; + } + + // Step 2: Append to output string + if ( ((CodePoint >= 0) && (CodePoint <= 0xD7FF)) || + ((CodePoint >= 0xE000) && (CodePoint <= 0xFFFF)) ) + out.Append((wchar_t) (CodePoint & 0xFFFF)); + } + + return out; +} + +// ************ TWideString ************ +TWideString::TWideString(const char* pkText) +{ + *this = TString(pkText).ToUTF16(); +} + +TWideString::TWideString(const TString& rkText) +{ + *this = rkText.ToUTF16(); +} + +TString TWideString::ToUTF8() const +{ + return "UTF16 to UTF8 currently unsupported"; +} diff --git a/Common/TString.h b/Common/TString.h new file mode 100644 index 00000000..a34f8ecd --- /dev/null +++ b/Common/TString.h @@ -0,0 +1,667 @@ +#ifndef TSTRING_H +#define TSTRING_H + +#include "types.h" +#include + +#include +#include +#include +#include +#include + +/* This is a string class which is essentially a wrapper around std::basic_string. + * The reason for this is because there are a lot of string functions I use very + * frequently that std::string is missing and this is more convenient than creating + * all these functions externally. I've chosen to remove access to the default + * std::basic_string functions and replace them with a custom API for consistency. + * + * Most of the file contains an implementation for a template base class, TBasicString. + * Afterwards we define the following subclasses/typedefs: + * + * - TBasicString - TString + * - TBasicString - TWideString + * - std::list - TStringList + * - std::list - TWideStringList + * + * TString and TWideString have functions for converting between each other. For these + * functions, TString is expected to be encoded in UTF-8 and TWideString is expected to + * be encoded in UTF-16. + */ + +// ************ TBasicString ************ +template +class TBasicString +{ + typedef TBasicString _TString; + typedef std::list<_TString> _TStringList; + +protected: + std::basic_string mInternalString; + +public: + // Constructors + TBasicString() + : mInternalString() + { + } + + TBasicString(u32 size) + : mInternalString(size, 0) + { + } + + TBasicString(u32 size, CharType fill) + : mInternalString(size, fill) + { + } + + TBasicString(const CharType* pkText) + : mInternalString(pkText) + { + } + + TBasicString(const std::basic_string& rkText) + : mInternalString(rkText) + { + } + + // Data Accessors + inline const CharType* CString() const + { + return mInternalString.c_str(); + } + + inline CharType At(u32 pos) const + { +#if _DEBUG + if (Size() <= pos) + throw std::out_of_range("Invalid position passed to TBasicString::At()"); +#endif + return mInternalString.at(pos); + } + + inline CharType Front() const + { + return (Size() > 0 ? mInternalString[0] : 0); + } + + inline CharType Back() const + { + return (Size() > 0 ? mInternalString[Size() - 1] : 0); + } + + inline u32 Size() const + { + return mInternalString.size(); + } + + inline u32 Length() const + { + return Size(); + } + + inline u32 IndexOf(const CharType* pkCharacters) const + { + return (u32) mInternalString.find_first_of(pkCharacters); + } + + inline u32 LastIndexOf(const CharType* pkCharacters) const + { + return (u32) mInternalString.find_last_of(pkCharacters); + } + + // Modify String + inline _TString SubString(int startPos, int length) const + { + return mInternalString.substr(startPos, length); + } + + inline void Insert(u32 pos, CharType c) + { +#ifdef _DEBUG + if (Size() < pos) + throw std::out_of_range("Invalid pos passed to TBasicString::Insert(CharType)"); +#endif + mInternalString.insert(pos, 1, c); + } + + inline void Insert(u32 pos, const CharType* pkStr) + { +#ifdef _DEBUG + if (Size() < pos) + throw std::out_of_range("Invalid pos passed to TBasicString::Insert(const CharType*)"); +#endif + mInternalString.insert(pos, pkStr); + } + + inline void Insert(u32 pos, const _TString& rkStr) + { + Insert(pos, rkStr.CString()); + } + + inline void Append(CharType c) + { + mInternalString.append(1, c); + } + + inline void Append(const CharType* pkText) + { + mInternalString.append(pkText); + } + + inline void Append(const _TString& rkStr) + { + mInternalString.append(rkStr.CString()); + } + + inline void Prepend(CharType c) + { + Insert(0, c); + } + + inline void Prepend(const CharType* pkText) + { + Insert(0, pkText); + } + + inline void Prepend(const _TString& rkStr) + { + Insert(0, rkStr); + } + + _TString ToUpper() const + { + _TString out(Size()); + + for (u32 iChar = 0; iChar < Size(); iChar++) + { + CharType c = At(iChar); + + if (c >= 'a' && c <= 'z') + out[iChar] = c - 0x20; + else + out[iChar] = c; + } + + return out; + } + + _TString ToLower() const + { + _TString out(Size()); + + for (u32 iChar = 0; iChar < Size(); iChar++) + { + CharType c = At(iChar); + + if (c >= 'A' && c <= 'Z') + out[iChar] = c + 0x20; + else + out[iChar] = c; + } + + return out; + } + + _TString Trimmed() const + { + static bool _TString::IsWhitespace(CharType); + int start, end; + + for (u32 iChar = 0; iChar < Size(); iChar++) + { + if (!IsWhitespace(mInternalString[iChar])) + { + start = iChar; + break; + } + } + + for (int iChar = Size() - 1; iChar >= 0; iChar--) + { + if (!IsWhitespace(mInternalString[iChar])) + { + end = iChar + 1; + break; + } + } + + return SubString(start, end - start); + } + + inline _TString Truncate(u32 amount) const + { + return SubString(0, amount); + } + + inline _TString ChopFront(u32 amount) const + { + return SubString(amount, Size() - amount); + } + + inline _TString ChopBack(u32 amount) const + { + return SubString(0, Size() - amount); + } + + u32 Hash32() const + { + u32 hash = 0; + + for (u32 iChar = 0; iChar < Size(); iChar++) + { + hash += At(iChar); + hash *= 101; + } + + return hash; + } + + u64 Hash64() const + { + u64 hash = 0; + + for (u32 iChar = 0; iChar < Size(); iChar++) + { + hash += At(iChar); + hash *= 101; + } + + return hash; + } + + inline u32 ToInt32(int base = 10) const + { + return std::stoul(mInternalString, nullptr, base); + } + + inline u64 ToInt64(int base = 10) const + { + return std::stoull(mInternalString, nullptr, base); + } + + void ToInt128(CharType* pOut, int base = 16) const + { + // TODO: only works in base 16 + u64 part1 = std::stoull(mInternalString.substr(0, 16), nullptr, base); + u64 part2 = std::stoull(mInternalString.substr(16, 16), nullptr, base); + + if (IOUtil::SystemEndianness == IOUtil::LittleEndian) + { + IOUtil::SwapBytes(part1); + IOUtil::SwapBytes(part2); + } + + memcpy(pOut, &part1, 8); + memcpy(pOut + 8, &part2, 8); + } + + _TStringList Split(const CharType* pkTokens) const + { + _TStringList out; + u32 lastSplit = 0; + + // Iterate over all characters in the input string + for (u32 iChr = 0; iChr < Length(); iChr++) + { + // Check whether this character is one of the user-provided tokens + for (u32 iTok = 0; true; iTok++) + { + if (!pkTokens[iTok]) break; + + if (mInternalString[iChr] == pkTokens[iTok]) + { + // Token found - split string + if (iChr > lastSplit) + out.push_back(SubString(lastSplit, iChr - lastSplit)); + + lastSplit = iChr + 1; + break; + } + } + } + + // Add final string + if (lastSplit != Length()) + out.push_back(SubString(lastSplit, Length() - lastSplit)); + + return out; + } + + void EnsureEndsWith(CharType chr) + { + if (Back() != chr) + Append(chr); + } + + void EnsureEndsWith(const CharType* pkText) + { + if (!EndsWith(pkText)) + Append(pkText); + } + + // Check String + bool StartsWith(const _TString& str) const + { + if (Size() < str.Size()) + return false; + + return (SubString(0, str.Size()) == str); + } + + bool EndsWith(const _TString& str) const + { + if (Size() < str.Size()) + return false; + + return (SubString(Size() - str.Size(), str.Size()) == str); + } + + bool Contains(_TString str, bool caseSensitive = true) const + { + if (Size() < str.Size()) return false; + + _TString checkStr(caseSensitive ? *this : ToUpper()); + if (!caseSensitive) str = str.ToUpper(); + + int latestPossibleStart = Size() - str.Size(); + int match = 0; + + for (int iChr = 0; iChr < latestPossibleStart; iChr++) + { + // If the current character matches, increment match + if (checkStr.At(iChr) == str.At(match)) + match++; + + // Otherwise... + else + { + // We need to also compare this character to the first + // character of the string (unless we just did that) + if (match > 0) + iChr--; + + match = 0; + } + + // If we've matched the entire string, then we can return true + if (match == str.Size()) return true; + } + + return false; + } + + bool IsHexString(bool requirePrefix = false, u32 width = -1) const + { + _TString str(*this); + bool hasPrefix = str.StartsWith("0x"); + + // If we're required to match the prefix and prefix is missing, return false + if (requirePrefix && !hasPrefix) + return false; + + if (width == -1) + { + // If the string has the 0x prefix, remove it + if (hasPrefix) + str = str.ChopFront(2); + + // If we have a variable width then assign the width value to the string size + width = str.Size(); + } + + // If the string starts with the prefix and the length matches the string, remove the prefix + else if ((str.Size() == width + 2) && (hasPrefix)) + str = str.ChopFront(2); + + // By this point, the string size and the width should match. If they don't, return false. + if (str.Size() != width) return false; + + // Now we can finally check the actual string and make sure all the characters are valid hex characters. + for (u32 c = 0; c < width; c++) + { + char chr = str[c]; + if (!((chr >= '0') && (chr <= '9')) && + !((chr >= 'a') && (chr <= 'f')) && + !((chr >= 'A') && (chr <= 'F'))) + return false; + } + + return true; + } + + inline bool CaseInsensitiveCompare(const _TString& rkOther) const + { + return (ToUpper() == rkOther.ToUpper()); + } + + // Get Filename Components + _TString GetFileDirectory() const + { + size_t endPath = mInternalString.find_last_of("\\/"); + return SubString(0, endPath + 1); + } + + _TString GetFileName(bool withExtension = true) const + { + size_t endPath = mInternalString.find_last_of("\\/") + 1; + + if (withExtension) + { + return SubString(endPath, Size() - endPath); + } + + else + { + size_t endName = mInternalString.find_last_of("."); + return SubString(endPath, endName - endPath); + } + } + + _TString GetFileExtension() const + { + size_t endName = mInternalString.find_last_of("."); + return SubString(endName + 1, Size() - endName); + } + + _TString GetFilePathWithoutExtension() const + { + size_t endName = mInternalString.find_last_of("."); + return SubString(0, endName); + } + + // Operators + inline _TString& operator=(const CharType* pkText) + { + mInternalString = pkText; + return *this; + } + + inline _TString& operator=(const _TString& rkText) + { + mInternalString = rkText.mInternalString; + return *this; + } + + _TString operator+(const CharType* pkOther) const + { + size_t len = strlen(pkOther); + + _TString out(len + Size()); + memcpy(&out[0], mInternalString.data(), Size() * sizeof(CharType)); + memcpy(&out[Size()], pkOther, len * sizeof(CharType)); + return out; + } + + inline _TString operator+(const _TString& other) const + { + return (*this + other.CString()); + } + + inline void operator+=(const CharType* pkOther) + { + *this = *this + pkOther; + } + + inline void operator+=(const _TString& rkOther) + { + *this = *this + rkOther; + } + + inline friend _TString operator+(const CharType* pkLeft, const _TString& rkRight) + { + size_t len = strlen(pkLeft); + + _TString out(len + rkRight.Size()); + memcpy(&out[0], pkLeft, len * sizeof(CharType)); + memcpy(&out[len], rkRight.CString(), rkRight.Size() * sizeof(CharType)); + return out; + } + + inline CharType& operator[](int pos) + { + return mInternalString[pos]; + } + + inline const CharType& operator[](int pos) const + { + return mInternalString[pos]; + } + + inline bool operator==(const CharType *pkText) const + { + return strcmp(pkText, mInternalString.data()) == 0; + } + + inline bool operator!=(const CharType *pkText) const + { + return (!(*this == pkText)); + } + + inline bool operator==(const _TString& rkOther) const + { + return (strcmp(mInternalString.data(), rkOther.mInternalString.data()) == 0); + } + + inline bool operator!=(const _TString& rkOther) const + { + return (!(*this == rkOther)); + } + + inline friend bool operator==(const CharType *pText, const _TString& rkString) + { + return (rkString == pText); + } + + inline friend bool operator!=(const CharType *pText, const _TString& rkString) + { + return (rkString != pText); + } + + inline friend std::ostream& operator<<(std::ostream& rLeft, const _TString& rkRight) + { + rLeft << rkRight.mInternalString; + return rLeft; + } + + inline friend std::istream& operator>>(std::istream& rLeft, const _TString& rkRight) + { + rLeft >> rkRight.mInternalString; + return rLeft; + } + + // Static + static TBasicString FromInt32(u32 value, int base = 10) + { + std::basic_stringstream sstream; + sstream << std::setbase(base) << value; + return sstream.str(); + } + + static TBasicString FromInt64(u64 value, int base = 10) + { + std::basic_stringstream sstream; + sstream << std::setbase(base) << value; + return sstream.str(); + } + + static TBasicString HexString(unsigned char num, bool addPrefix = true, bool uppercase = false, int width = 0) + { + return HexString((unsigned long) num, addPrefix, uppercase, width); + } + + static TBasicString HexString(unsigned short num, bool addPrefix = true, bool uppercase = false, int width = 0) + { + return HexString((unsigned long) num, addPrefix, uppercase, width); + } + + static TBasicString HexString(unsigned long num, bool addPrefix = true, bool uppercase = false, int width = 0) + { + std::basic_stringstream sstream; + sstream << std::hex << std::setw(width) << std::setfill('0') << num; + + _TString str = sstream.str(); + if (uppercase) str = str.ToUpper(); + if (addPrefix) str.Prepend("0x"); + return str; + } + + static bool IsWhitespace(CharType c) + { + return ( (c == '\t') || + (c == '\n') || + (c == '\v') || + (c == '\f') || + (c == '\r') || + (c == ' ') ); + } +}; + +// ************ TString ************ +class TString : public TBasicString +{ +public: + TString() : TBasicString() {} + TString(size_t size) : TBasicString(size) {} + TString(size_t size, char fill) : TBasicString(size, fill) {} + TString(const char* pkText) : TBasicString(pkText) {} + TString(const std::string& rkText) : TBasicString(rkText) {} + TString(const TBasicString& rkStr) : TBasicString(rkStr) {} + TString(const wchar_t* pkText); + TString(const class TWideString& rkText); + + inline std::string ToStdString() + { + return mInternalString; + } + + class TWideString ToUTF16() const; +}; + +// ************ TWideString ************ +class TWideString : public TBasicString +{ +public: + TWideString() : TBasicString() {} + TWideString(u32 size) : TBasicString(size) {} + TWideString(u32 size, wchar_t fill) : TBasicString(size, fill) {} + TWideString(const wchar_t* pkText) : TBasicString(pkText) {} + TWideString(const std::wstring& rkText) : TBasicString(rkText) {} + TWideString(const TBasicString& rkStr) : TBasicString(rkStr) {} + TWideString(const char* pkText); + TWideString(const TString& rkText); + + inline std::wstring ToStdWString() + { + return mInternalString; + } + + class TString ToUTF8() const; +}; + +// ************ Typedefs ************ +typedef std::list> TStringList; +typedef std::list> TWideStringList; + +#endif // TSTRING_H