Created and wrote core functionality for TString class

This commit is contained in:
parax0 2015-11-23 07:43:05 -07:00
parent 763d4b8b0a
commit 53408ffac9
2 changed files with 772 additions and 0 deletions

105
Common/TString.cpp Normal file
View File

@ -0,0 +1,105 @@
#include "TString.h"
#include <FileIO/IOUtil.h>
// ************ TString ************
TString::TString(const wchar_t* pkText)
{
*this = TWideString(pkText).ToUTF8();
}
TString::TString(const TWideString& rkText)
{
*this = rkText.ToUTF8();
}
TWideString TString::ToUTF16() const
{
TWideString out;
const char *pkCStr = CString();
while (pkCStr[0])
{
// Step 1: decode UTF-8 code point
wchar_t CodePoint;
// One byte
if ((pkCStr[0] & 0x80) == 0)
{
CodePoint = pkCStr[0] & 0x7FFFFFFF;
pkCStr++;
}
// Two bytes
else if ((pkCStr[0] & 0xE0) == 0xC0)
{
CodePoint = (((pkCStr[0] & 0x1F) << 6) |
(pkCStr[1] & 0x3F));
pkCStr += 2;
}
// Three bytes
else if ((pkCStr[0] & 0xF0) == 0xE0)
{
CodePoint = (((pkCStr[0] & 0xF) << 12) |
((pkCStr[1] & 0x3F) << 6) |
(pkCStr[2] & 0x3F));
pkCStr += 3;
}
// Four bytes
else if ((pkCStr[0] & 0xF8) == 0xF0)
{
CodePoint = (((pkCStr[0] & 0x7) << 18) |
((pkCStr[1] & 0x3F) << 12) |
((pkCStr[2] & 0x3F) << 6) |
(pkCStr[3] & 0x3F));
pkCStr += 4;
}
// Five bytes
else if ((pkCStr[0] & 0xFC) == 0xF8)
{
CodePoint = (((pkCStr[0] & 0x3) << 24) |
((pkCStr[1] & 0x3F) << 18) |
((pkCStr[2] & 0x3F) << 12) |
((pkCStr[3] & 0x3F) << 6) |
(pkCStr[4] & 0x3F));
pkCStr += 5;
}
// Six bytes
else if ((pkCStr[0] & 0xFE) == 0xFC)
{
CodePoint = (((pkCStr[0] & 0x1) << 30) |
((pkCStr[1] & 0x3F) << 24) |
((pkCStr[2] & 0x3F) << 18) |
((pkCStr[3] & 0x3F) << 12) |
((pkCStr[4] & 0x3F) << 6) |
(pkCStr[5] & 0x3F));
pkCStr += 6;
}
// Step 2: Append to output string
if ( ((CodePoint >= 0) && (CodePoint <= 0xD7FF)) ||
((CodePoint >= 0xE000) && (CodePoint <= 0xFFFF)) )
out.Append((wchar_t) (CodePoint & 0xFFFF));
}
return out;
}
// ************ TWideString ************
TWideString::TWideString(const char* pkText)
{
*this = TString(pkText).ToUTF16();
}
TWideString::TWideString(const TString& rkText)
{
*this = rkText.ToUTF16();
}
TString TWideString::ToUTF8() const
{
return "UTF16 to UTF8 currently unsupported";
}

667
Common/TString.h Normal file
View File

@ -0,0 +1,667 @@
#ifndef TSTRING_H
#define TSTRING_H
#include "types.h"
#include <FileIO/IOUtil.h>
#include <string>
#include <list>
#include <vector>
#include <sstream>
#include <iomanip>
/* This is a string class which is essentially a wrapper around std::basic_string.
* The reason for this is because there are a lot of string functions I use very
* frequently that std::string is missing and this is more convenient than creating
* all these functions externally. I've chosen to remove access to the default
* std::basic_string functions and replace them with a custom API for consistency.
*
* Most of the file contains an implementation for a template base class, TBasicString.
* Afterwards we define the following subclasses/typedefs:
*
* - TBasicString<char> - TString
* - TBasicString<wchar_t> - TWideString
* - std::list<TString> - TStringList
* - std::list<TWideString> - TWideStringList
*
* TString and TWideString have functions for converting between each other. For these
* functions, TString is expected to be encoded in UTF-8 and TWideString is expected to
* be encoded in UTF-16.
*/
// ************ TBasicString ************
template<class CharType>
class TBasicString
{
typedef TBasicString<CharType> _TString;
typedef std::list<_TString> _TStringList;
protected:
std::basic_string<CharType> mInternalString;
public:
// Constructors
TBasicString()
: mInternalString()
{
}
TBasicString(u32 size)
: mInternalString(size, 0)
{
}
TBasicString(u32 size, CharType fill)
: mInternalString(size, fill)
{
}
TBasicString(const CharType* pkText)
: mInternalString(pkText)
{
}
TBasicString(const std::basic_string<CharType>& rkText)
: mInternalString(rkText)
{
}
// Data Accessors
inline const CharType* CString() const
{
return mInternalString.c_str();
}
inline CharType At(u32 pos) const
{
#if _DEBUG
if (Size() <= pos)
throw std::out_of_range("Invalid position passed to TBasicString::At()");
#endif
return mInternalString.at(pos);
}
inline CharType Front() const
{
return (Size() > 0 ? mInternalString[0] : 0);
}
inline CharType Back() const
{
return (Size() > 0 ? mInternalString[Size() - 1] : 0);
}
inline u32 Size() const
{
return mInternalString.size();
}
inline u32 Length() const
{
return Size();
}
inline u32 IndexOf(const CharType* pkCharacters) const
{
return (u32) mInternalString.find_first_of(pkCharacters);
}
inline u32 LastIndexOf(const CharType* pkCharacters) const
{
return (u32) mInternalString.find_last_of(pkCharacters);
}
// Modify String
inline _TString SubString(int startPos, int length) const
{
return mInternalString.substr(startPos, length);
}
inline void Insert(u32 pos, CharType c)
{
#ifdef _DEBUG
if (Size() < pos)
throw std::out_of_range("Invalid pos passed to TBasicString::Insert(CharType)");
#endif
mInternalString.insert(pos, 1, c);
}
inline void Insert(u32 pos, const CharType* pkStr)
{
#ifdef _DEBUG
if (Size() < pos)
throw std::out_of_range("Invalid pos passed to TBasicString::Insert(const CharType*)");
#endif
mInternalString.insert(pos, pkStr);
}
inline void Insert(u32 pos, const _TString& rkStr)
{
Insert(pos, rkStr.CString());
}
inline void Append(CharType c)
{
mInternalString.append(1, c);
}
inline void Append(const CharType* pkText)
{
mInternalString.append(pkText);
}
inline void Append(const _TString& rkStr)
{
mInternalString.append(rkStr.CString());
}
inline void Prepend(CharType c)
{
Insert(0, c);
}
inline void Prepend(const CharType* pkText)
{
Insert(0, pkText);
}
inline void Prepend(const _TString& rkStr)
{
Insert(0, rkStr);
}
_TString ToUpper() const
{
_TString out(Size());
for (u32 iChar = 0; iChar < Size(); iChar++)
{
CharType c = At(iChar);
if (c >= 'a' && c <= 'z')
out[iChar] = c - 0x20;
else
out[iChar] = c;
}
return out;
}
_TString ToLower() const
{
_TString out(Size());
for (u32 iChar = 0; iChar < Size(); iChar++)
{
CharType c = At(iChar);
if (c >= 'A' && c <= 'Z')
out[iChar] = c + 0x20;
else
out[iChar] = c;
}
return out;
}
_TString Trimmed() const
{
static bool _TString::IsWhitespace(CharType);
int start, end;
for (u32 iChar = 0; iChar < Size(); iChar++)
{
if (!IsWhitespace(mInternalString[iChar]))
{
start = iChar;
break;
}
}
for (int iChar = Size() - 1; iChar >= 0; iChar--)
{
if (!IsWhitespace(mInternalString[iChar]))
{
end = iChar + 1;
break;
}
}
return SubString(start, end - start);
}
inline _TString Truncate(u32 amount) const
{
return SubString(0, amount);
}
inline _TString ChopFront(u32 amount) const
{
return SubString(amount, Size() - amount);
}
inline _TString ChopBack(u32 amount) const
{
return SubString(0, Size() - amount);
}
u32 Hash32() const
{
u32 hash = 0;
for (u32 iChar = 0; iChar < Size(); iChar++)
{
hash += At(iChar);
hash *= 101;
}
return hash;
}
u64 Hash64() const
{
u64 hash = 0;
for (u32 iChar = 0; iChar < Size(); iChar++)
{
hash += At(iChar);
hash *= 101;
}
return hash;
}
inline u32 ToInt32(int base = 10) const
{
return std::stoul(mInternalString, nullptr, base);
}
inline u64 ToInt64(int base = 10) const
{
return std::stoull(mInternalString, nullptr, base);
}
void ToInt128(CharType* pOut, int base = 16) const
{
// TODO: only works in base 16
u64 part1 = std::stoull(mInternalString.substr(0, 16), nullptr, base);
u64 part2 = std::stoull(mInternalString.substr(16, 16), nullptr, base);
if (IOUtil::SystemEndianness == IOUtil::LittleEndian)
{
IOUtil::SwapBytes(part1);
IOUtil::SwapBytes(part2);
}
memcpy(pOut, &part1, 8);
memcpy(pOut + 8, &part2, 8);
}
_TStringList Split(const CharType* pkTokens) const
{
_TStringList out;
u32 lastSplit = 0;
// Iterate over all characters in the input string
for (u32 iChr = 0; iChr < Length(); iChr++)
{
// Check whether this character is one of the user-provided tokens
for (u32 iTok = 0; true; iTok++)
{
if (!pkTokens[iTok]) break;
if (mInternalString[iChr] == pkTokens[iTok])
{
// Token found - split string
if (iChr > lastSplit)
out.push_back(SubString(lastSplit, iChr - lastSplit));
lastSplit = iChr + 1;
break;
}
}
}
// Add final string
if (lastSplit != Length())
out.push_back(SubString(lastSplit, Length() - lastSplit));
return out;
}
void EnsureEndsWith(CharType chr)
{
if (Back() != chr)
Append(chr);
}
void EnsureEndsWith(const CharType* pkText)
{
if (!EndsWith(pkText))
Append(pkText);
}
// Check String
bool StartsWith(const _TString& str) const
{
if (Size() < str.Size())
return false;
return (SubString(0, str.Size()) == str);
}
bool EndsWith(const _TString& str) const
{
if (Size() < str.Size())
return false;
return (SubString(Size() - str.Size(), str.Size()) == str);
}
bool Contains(_TString str, bool caseSensitive = true) const
{
if (Size() < str.Size()) return false;
_TString checkStr(caseSensitive ? *this : ToUpper());
if (!caseSensitive) str = str.ToUpper();
int latestPossibleStart = Size() - str.Size();
int match = 0;
for (int iChr = 0; iChr < latestPossibleStart; iChr++)
{
// If the current character matches, increment match
if (checkStr.At(iChr) == str.At(match))
match++;
// Otherwise...
else
{
// We need to also compare this character to the first
// character of the string (unless we just did that)
if (match > 0)
iChr--;
match = 0;
}
// If we've matched the entire string, then we can return true
if (match == str.Size()) return true;
}
return false;
}
bool IsHexString(bool requirePrefix = false, u32 width = -1) const
{
_TString str(*this);
bool hasPrefix = str.StartsWith("0x");
// If we're required to match the prefix and prefix is missing, return false
if (requirePrefix && !hasPrefix)
return false;
if (width == -1)
{
// If the string has the 0x prefix, remove it
if (hasPrefix)
str = str.ChopFront(2);
// If we have a variable width then assign the width value to the string size
width = str.Size();
}
// If the string starts with the prefix and the length matches the string, remove the prefix
else if ((str.Size() == width + 2) && (hasPrefix))
str = str.ChopFront(2);
// By this point, the string size and the width should match. If they don't, return false.
if (str.Size() != width) return false;
// Now we can finally check the actual string and make sure all the characters are valid hex characters.
for (u32 c = 0; c < width; c++)
{
char chr = str[c];
if (!((chr >= '0') && (chr <= '9')) &&
!((chr >= 'a') && (chr <= 'f')) &&
!((chr >= 'A') && (chr <= 'F')))
return false;
}
return true;
}
inline bool CaseInsensitiveCompare(const _TString& rkOther) const
{
return (ToUpper() == rkOther.ToUpper());
}
// Get Filename Components
_TString GetFileDirectory() const
{
size_t endPath = mInternalString.find_last_of("\\/");
return SubString(0, endPath + 1);
}
_TString GetFileName(bool withExtension = true) const
{
size_t endPath = mInternalString.find_last_of("\\/") + 1;
if (withExtension)
{
return SubString(endPath, Size() - endPath);
}
else
{
size_t endName = mInternalString.find_last_of(".");
return SubString(endPath, endName - endPath);
}
}
_TString GetFileExtension() const
{
size_t endName = mInternalString.find_last_of(".");
return SubString(endName + 1, Size() - endName);
}
_TString GetFilePathWithoutExtension() const
{
size_t endName = mInternalString.find_last_of(".");
return SubString(0, endName);
}
// Operators
inline _TString& operator=(const CharType* pkText)
{
mInternalString = pkText;
return *this;
}
inline _TString& operator=(const _TString& rkText)
{
mInternalString = rkText.mInternalString;
return *this;
}
_TString operator+(const CharType* pkOther) const
{
size_t len = strlen(pkOther);
_TString out(len + Size());
memcpy(&out[0], mInternalString.data(), Size() * sizeof(CharType));
memcpy(&out[Size()], pkOther, len * sizeof(CharType));
return out;
}
inline _TString operator+(const _TString& other) const
{
return (*this + other.CString());
}
inline void operator+=(const CharType* pkOther)
{
*this = *this + pkOther;
}
inline void operator+=(const _TString& rkOther)
{
*this = *this + rkOther;
}
inline friend _TString operator+(const CharType* pkLeft, const _TString& rkRight)
{
size_t len = strlen(pkLeft);
_TString out(len + rkRight.Size());
memcpy(&out[0], pkLeft, len * sizeof(CharType));
memcpy(&out[len], rkRight.CString(), rkRight.Size() * sizeof(CharType));
return out;
}
inline CharType& operator[](int pos)
{
return mInternalString[pos];
}
inline const CharType& operator[](int pos) const
{
return mInternalString[pos];
}
inline bool operator==(const CharType *pkText) const
{
return strcmp(pkText, mInternalString.data()) == 0;
}
inline bool operator!=(const CharType *pkText) const
{
return (!(*this == pkText));
}
inline bool operator==(const _TString& rkOther) const
{
return (strcmp(mInternalString.data(), rkOther.mInternalString.data()) == 0);
}
inline bool operator!=(const _TString& rkOther) const
{
return (!(*this == rkOther));
}
inline friend bool operator==(const CharType *pText, const _TString& rkString)
{
return (rkString == pText);
}
inline friend bool operator!=(const CharType *pText, const _TString& rkString)
{
return (rkString != pText);
}
inline friend std::ostream& operator<<(std::ostream& rLeft, const _TString& rkRight)
{
rLeft << rkRight.mInternalString;
return rLeft;
}
inline friend std::istream& operator>>(std::istream& rLeft, const _TString& rkRight)
{
rLeft >> rkRight.mInternalString;
return rLeft;
}
// Static
static TBasicString<CharType> FromInt32(u32 value, int base = 10)
{
std::basic_stringstream<CharType> sstream;
sstream << std::setbase(base) << value;
return sstream.str();
}
static TBasicString<CharType> FromInt64(u64 value, int base = 10)
{
std::basic_stringstream<CharType> sstream;
sstream << std::setbase(base) << value;
return sstream.str();
}
static TBasicString<CharType> HexString(unsigned char num, bool addPrefix = true, bool uppercase = false, int width = 0)
{
return HexString((unsigned long) num, addPrefix, uppercase, width);
}
static TBasicString<CharType> HexString(unsigned short num, bool addPrefix = true, bool uppercase = false, int width = 0)
{
return HexString((unsigned long) num, addPrefix, uppercase, width);
}
static TBasicString<CharType> HexString(unsigned long num, bool addPrefix = true, bool uppercase = false, int width = 0)
{
std::basic_stringstream<CharType> sstream;
sstream << std::hex << std::setw(width) << std::setfill('0') << num;
_TString str = sstream.str();
if (uppercase) str = str.ToUpper();
if (addPrefix) str.Prepend("0x");
return str;
}
static bool IsWhitespace(CharType c)
{
return ( (c == '\t') ||
(c == '\n') ||
(c == '\v') ||
(c == '\f') ||
(c == '\r') ||
(c == ' ') );
}
};
// ************ TString ************
class TString : public TBasicString<char>
{
public:
TString() : TBasicString<char>() {}
TString(size_t size) : TBasicString<char>(size) {}
TString(size_t size, char fill) : TBasicString<char>(size, fill) {}
TString(const char* pkText) : TBasicString<char>(pkText) {}
TString(const std::string& rkText) : TBasicString<char>(rkText) {}
TString(const TBasicString<char>& rkStr) : TBasicString<char>(rkStr) {}
TString(const wchar_t* pkText);
TString(const class TWideString& rkText);
inline std::string ToStdString()
{
return mInternalString;
}
class TWideString ToUTF16() const;
};
// ************ TWideString ************
class TWideString : public TBasicString<wchar_t>
{
public:
TWideString() : TBasicString<wchar_t>() {}
TWideString(u32 size) : TBasicString<wchar_t>(size) {}
TWideString(u32 size, wchar_t fill) : TBasicString<wchar_t>(size, fill) {}
TWideString(const wchar_t* pkText) : TBasicString<wchar_t>(pkText) {}
TWideString(const std::wstring& rkText) : TBasicString<wchar_t>(rkText) {}
TWideString(const TBasicString<wchar_t>& rkStr) : TBasicString<wchar_t>(rkStr) {}
TWideString(const char* pkText);
TWideString(const TString& rkText);
inline std::wstring ToStdWString()
{
return mInternalString;
}
class TString ToUTF8() const;
};
// ************ Typedefs ************
typedef std::list<TBasicString<char>> TStringList;
typedef std::list<TBasicString<wchar_t>> TWideStringList;
#endif // TSTRING_H