#ifndef TSTRING_H #define TSTRING_H #include "types.h" #include #include #include #include #include #include /* This is a string class which is essentially a wrapper around std::basic_string. * The reason for this is because there are a lot of string functions I use very * frequently that std::string is missing and this is more convenient than creating * all these functions externally. I've chosen to remove access to the default * std::basic_string functions and replace them with a custom API for consistency. * * Most of the file contains an implementation for a template base class, TBasicString. * Afterwards we define the following subclasses/typedefs: * * - TBasicString - TString * - TBasicString - TWideString * - std::list - TStringList * - std::list - TWideStringList * * TString and TWideString have functions for converting between each other. For these * functions, TString is expected to be encoded in UTF-8 and TWideString is expected to * be encoded in UTF-16. */ // ************ TBasicString ************ template class TBasicString { typedef TBasicString _TString; typedef std::list<_TString> _TStringList; protected: std::basic_string mInternalString; public: // Constructors TBasicString() : mInternalString() { } TBasicString(u32 size) : mInternalString(size, 0) { } TBasicString(u32 size, CharType fill) : mInternalString(size, fill) { } TBasicString(const CharType* pkText) : mInternalString(pkText) { } TBasicString(const std::basic_string& rkText) : mInternalString(rkText) { } // Data Accessors inline const CharType* CString() const { return mInternalString.c_str(); } inline CharType At(u32 pos) const { #if _DEBUG if (Size() <= pos) throw std::out_of_range("Invalid position passed to TBasicString::At()"); #endif return mInternalString.at(pos); } inline CharType Front() const { return (Size() > 0 ? mInternalString[0] : 0); } inline CharType Back() const { return (Size() > 0 ? mInternalString[Size() - 1] : 0); } inline u32 Size() const { return mInternalString.size(); } inline u32 Length() const { return Size(); } inline u32 IndexOf(const CharType* pkCharacters) const { return (u32) mInternalString.find_first_of(pkCharacters); } inline u32 LastIndexOf(const CharType* pkCharacters) const { return (u32) mInternalString.find_last_of(pkCharacters); } // Modify String inline _TString SubString(int startPos, int length) const { return mInternalString.substr(startPos, length); } inline void Insert(u32 pos, CharType c) { #ifdef _DEBUG if (Size() < pos) throw std::out_of_range("Invalid pos passed to TBasicString::Insert(CharType)"); #endif mInternalString.insert(pos, 1, c); } inline void Insert(u32 pos, const CharType* pkStr) { #ifdef _DEBUG if (Size() < pos) throw std::out_of_range("Invalid pos passed to TBasicString::Insert(const CharType*)"); #endif mInternalString.insert(pos, pkStr); } inline void Insert(u32 pos, const _TString& rkStr) { Insert(pos, rkStr.CString()); } inline void Append(CharType c) { mInternalString.append(1, c); } inline void Append(const CharType* pkText) { mInternalString.append(pkText); } inline void Append(const _TString& rkStr) { mInternalString.append(rkStr.CString()); } inline void Prepend(CharType c) { Insert(0, c); } inline void Prepend(const CharType* pkText) { Insert(0, pkText); } inline void Prepend(const _TString& rkStr) { Insert(0, rkStr); } _TString ToUpper() const { _TString out(Size()); for (u32 iChar = 0; iChar < Size(); iChar++) { CharType c = At(iChar); if (c >= 'a' && c <= 'z') out[iChar] = c - 0x20; else out[iChar] = c; } return out; } _TString ToLower() const { _TString out(Size()); for (u32 iChar = 0; iChar < Size(); iChar++) { CharType c = At(iChar); if (c >= 'A' && c <= 'Z') out[iChar] = c + 0x20; else out[iChar] = c; } return out; } _TString Trimmed() const { static bool _TString::IsWhitespace(CharType); int start, end; for (u32 iChar = 0; iChar < Size(); iChar++) { if (!IsWhitespace(mInternalString[iChar])) { start = iChar; break; } } for (int iChar = Size() - 1; iChar >= 0; iChar--) { if (!IsWhitespace(mInternalString[iChar])) { end = iChar + 1; break; } } return SubString(start, end - start); } inline _TString Truncate(u32 amount) const { return SubString(0, amount); } inline _TString ChopFront(u32 amount) const { return SubString(amount, Size() - amount); } inline _TString ChopBack(u32 amount) const { return SubString(0, Size() - amount); } u32 Hash32() const { u32 hash = 0; for (u32 iChar = 0; iChar < Size(); iChar++) { hash += At(iChar); hash *= 101; } return hash; } u64 Hash64() const { u64 hash = 0; for (u32 iChar = 0; iChar < Size(); iChar++) { hash += At(iChar); hash *= 101; } return hash; } inline u32 ToInt32(int base = 10) const { return std::stoul(mInternalString, nullptr, base); } inline u64 ToInt64(int base = 10) const { return std::stoull(mInternalString, nullptr, base); } void ToInt128(CharType* pOut, int base = 16) const { // TODO: only works in base 16 u64 part1 = std::stoull(mInternalString.substr(0, 16), nullptr, base); u64 part2 = std::stoull(mInternalString.substr(16, 16), nullptr, base); if (IOUtil::SystemEndianness == IOUtil::LittleEndian) { IOUtil::SwapBytes(part1); IOUtil::SwapBytes(part2); } memcpy(pOut, &part1, 8); memcpy(pOut + 8, &part2, 8); } _TStringList Split(const CharType* pkTokens) const { _TStringList out; u32 lastSplit = 0; // Iterate over all characters in the input string for (u32 iChr = 0; iChr < Length(); iChr++) { // Check whether this character is one of the user-provided tokens for (u32 iTok = 0; true; iTok++) { if (!pkTokens[iTok]) break; if (mInternalString[iChr] == pkTokens[iTok]) { // Token found - split string if (iChr > lastSplit) out.push_back(SubString(lastSplit, iChr - lastSplit)); lastSplit = iChr + 1; break; } } } // Add final string if (lastSplit != Length()) out.push_back(SubString(lastSplit, Length() - lastSplit)); return out; } void EnsureEndsWith(CharType chr) { if (Back() != chr) Append(chr); } void EnsureEndsWith(const CharType* pkText) { if (!EndsWith(pkText)) Append(pkText); } // Check String bool StartsWith(const _TString& str) const { if (Size() < str.Size()) return false; return (SubString(0, str.Size()) == str); } bool EndsWith(const _TString& str) const { if (Size() < str.Size()) return false; return (SubString(Size() - str.Size(), str.Size()) == str); } bool Contains(_TString str, bool caseSensitive = true) const { if (Size() < str.Size()) return false; _TString checkStr(caseSensitive ? *this : ToUpper()); if (!caseSensitive) str = str.ToUpper(); int latestPossibleStart = Size() - str.Size(); int match = 0; for (int iChr = 0; iChr < latestPossibleStart; iChr++) { // If the current character matches, increment match if (checkStr.At(iChr) == str.At(match)) match++; // Otherwise... else { // We need to also compare this character to the first // character of the string (unless we just did that) if (match > 0) iChr--; match = 0; } // If we've matched the entire string, then we can return true if (match == str.Size()) return true; } return false; } bool IsHexString(bool requirePrefix = false, u32 width = -1) const { _TString str(*this); bool hasPrefix = str.StartsWith("0x"); // If we're required to match the prefix and prefix is missing, return false if (requirePrefix && !hasPrefix) return false; if (width == -1) { // If the string has the 0x prefix, remove it if (hasPrefix) str = str.ChopFront(2); // If we have a variable width then assign the width value to the string size width = str.Size(); } // If the string starts with the prefix and the length matches the string, remove the prefix else if ((str.Size() == width + 2) && (hasPrefix)) str = str.ChopFront(2); // By this point, the string size and the width should match. If they don't, return false. if (str.Size() != width) return false; // Now we can finally check the actual string and make sure all the characters are valid hex characters. for (u32 c = 0; c < width; c++) { char chr = str[c]; if (!((chr >= '0') && (chr <= '9')) && !((chr >= 'a') && (chr <= 'f')) && !((chr >= 'A') && (chr <= 'F'))) return false; } return true; } inline bool CaseInsensitiveCompare(const _TString& rkOther) const { return (ToUpper() == rkOther.ToUpper()); } // Get Filename Components _TString GetFileDirectory() const { size_t endPath = mInternalString.find_last_of("\\/"); return SubString(0, endPath + 1); } _TString GetFileName(bool withExtension = true) const { size_t endPath = mInternalString.find_last_of("\\/") + 1; if (withExtension) { return SubString(endPath, Size() - endPath); } else { size_t endName = mInternalString.find_last_of("."); return SubString(endPath, endName - endPath); } } _TString GetFileExtension() const { size_t endName = mInternalString.find_last_of("."); return SubString(endName + 1, Size() - endName); } _TString GetFilePathWithoutExtension() const { size_t endName = mInternalString.find_last_of("."); return SubString(0, endName); } // Operators inline _TString& operator=(const CharType* pkText) { mInternalString = pkText; return *this; } inline _TString& operator=(const _TString& rkText) { mInternalString = rkText.mInternalString; return *this; } _TString operator+(const CharType* pkOther) const { size_t len = strlen(pkOther); _TString out(len + Size()); memcpy(&out[0], mInternalString.data(), Size() * sizeof(CharType)); memcpy(&out[Size()], pkOther, len * sizeof(CharType)); return out; } inline _TString operator+(const _TString& other) const { return (*this + other.CString()); } inline void operator+=(const CharType* pkOther) { *this = *this + pkOther; } inline void operator+=(const _TString& rkOther) { *this = *this + rkOther; } inline friend _TString operator+(const CharType* pkLeft, const _TString& rkRight) { size_t len = strlen(pkLeft); _TString out(len + rkRight.Size()); memcpy(&out[0], pkLeft, len * sizeof(CharType)); memcpy(&out[len], rkRight.CString(), rkRight.Size() * sizeof(CharType)); return out; } inline CharType& operator[](int pos) { return mInternalString[pos]; } inline const CharType& operator[](int pos) const { return mInternalString[pos]; } inline bool operator==(const CharType *pkText) const { return strcmp(pkText, mInternalString.data()) == 0; } inline bool operator!=(const CharType *pkText) const { return (!(*this == pkText)); } inline bool operator==(const _TString& rkOther) const { return (strcmp(mInternalString.data(), rkOther.mInternalString.data()) == 0); } inline bool operator!=(const _TString& rkOther) const { return (!(*this == rkOther)); } inline friend bool operator==(const CharType *pText, const _TString& rkString) { return (rkString == pText); } inline friend bool operator!=(const CharType *pText, const _TString& rkString) { return (rkString != pText); } inline friend std::ostream& operator<<(std::ostream& rLeft, const _TString& rkRight) { rLeft << rkRight.mInternalString; return rLeft; } inline friend std::istream& operator>>(std::istream& rLeft, const _TString& rkRight) { rLeft >> rkRight.mInternalString; return rLeft; } // Static static TBasicString FromInt32(u32 value, int base = 10) { std::basic_stringstream sstream; sstream << std::setbase(base) << value; return sstream.str(); } static TBasicString FromInt64(u64 value, int base = 10) { std::basic_stringstream sstream; sstream << std::setbase(base) << value; return sstream.str(); } static TBasicString HexString(unsigned char num, bool addPrefix = true, bool uppercase = false, int width = 0) { return HexString((unsigned long) num, addPrefix, uppercase, width); } static TBasicString HexString(unsigned short num, bool addPrefix = true, bool uppercase = false, int width = 0) { return HexString((unsigned long) num, addPrefix, uppercase, width); } static TBasicString HexString(unsigned long num, bool addPrefix = true, bool uppercase = false, int width = 0) { std::basic_stringstream sstream; sstream << std::hex << std::setw(width) << std::setfill('0') << num; _TString str = sstream.str(); if (uppercase) str = str.ToUpper(); if (addPrefix) str.Prepend("0x"); return str; } static bool IsWhitespace(CharType c) { return ( (c == '\t') || (c == '\n') || (c == '\v') || (c == '\f') || (c == '\r') || (c == ' ') ); } }; // ************ TString ************ class TString : public TBasicString { public: TString() : TBasicString() {} TString(size_t size) : TBasicString(size) {} TString(size_t size, char fill) : TBasicString(size, fill) {} TString(const char* pkText) : TBasicString(pkText) {} TString(const std::string& rkText) : TBasicString(rkText) {} TString(const TBasicString& rkStr) : TBasicString(rkStr) {} TString(const wchar_t* pkText); TString(const class TWideString& rkText); inline std::string ToStdString() { return mInternalString; } class TWideString ToUTF16() const; }; // ************ TWideString ************ class TWideString : public TBasicString { public: TWideString() : TBasicString() {} TWideString(u32 size) : TBasicString(size) {} TWideString(u32 size, wchar_t fill) : TBasicString(size, fill) {} TWideString(const wchar_t* pkText) : TBasicString(pkText) {} TWideString(const std::wstring& rkText) : TBasicString(rkText) {} TWideString(const TBasicString& rkStr) : TBasicString(rkStr) {} TWideString(const char* pkText); TWideString(const TString& rkText); inline std::wstring ToStdWString() { return mInternalString; } class TString ToUTF8() const; }; // ************ Typedefs ************ typedef std::list> TStringList; typedef std::list> TWideStringList; #endif // TSTRING_H