mirror of
https://github.com/libAthena/athena.git
synced 2025-12-18 09:25:22 +00:00
stripped-down utf8proc added
This commit is contained in:
@@ -7,15 +7,9 @@
|
||||
* Any changes to the types or namespacing must be reflected in 'atdna/main.cpp'
|
||||
*/
|
||||
|
||||
#if _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <yaml.h>
|
||||
#include <utf8proc.h>
|
||||
#include "DNA.hpp"
|
||||
|
||||
namespace Athena
|
||||
@@ -381,44 +375,40 @@ inline std::unique_ptr<YAMLNode> ValToNode(const char* val)
|
||||
template <>
|
||||
inline std::wstring NodeToVal(const YAMLNode* node)
|
||||
{
|
||||
#if _WIN32
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), nullptr, 0);
|
||||
std::wstring retval(len, L'\0');
|
||||
MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), &retval[0], len);
|
||||
return retval;
|
||||
#else
|
||||
std::wstring retval;
|
||||
retval.reserve(node->m_scalarString.length());
|
||||
const char* buf = node->m_scalarString.c_str();
|
||||
std::mbstate_t state = {};
|
||||
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(node->m_scalarString.c_str());
|
||||
while (*buf)
|
||||
{
|
||||
wchar_t wc;
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
retval += wc;
|
||||
utf8proc_int32_t wc;
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return retval;
|
||||
}
|
||||
buf += len;
|
||||
retval += wchar_t(wc);
|
||||
}
|
||||
return retval;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::unique_ptr<YAMLNode> ValToNode(const std::wstring& val)
|
||||
{
|
||||
YAMLNode* ret = new YAMLNode(YAML_SCALAR_NODE);
|
||||
#if _WIN32
|
||||
int len = WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), nullptr, 0, nullptr, nullptr);
|
||||
ret->m_scalarString.assign(len, '\0');
|
||||
WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), &ret->m_scalarString[0], len, nullptr, nullptr);
|
||||
#else
|
||||
ret->m_scalarString.reserve(val.length());
|
||||
std::mbstate_t state = {};
|
||||
for (wchar_t ch : val)
|
||||
{
|
||||
char mb[MB_LEN_MAX];
|
||||
int c = std::wcrtomb(mb, ch, &state);
|
||||
ret->m_scalarString.append(mb, c);
|
||||
utf8proc_uint8_t mb[4];
|
||||
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
||||
if (c < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while encoding");
|
||||
return std::unique_ptr<YAMLNode>(ret);
|
||||
}
|
||||
ret->m_scalarString.append(reinterpret_cast<char*>(mb), c);
|
||||
}
|
||||
#endif
|
||||
return std::unique_ptr<YAMLNode>(ret);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,9 @@
|
||||
#ifndef ISTREAMREADER_HPP
|
||||
#define ISTREAMREADER_HPP
|
||||
|
||||
#if _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "utf8proc.h"
|
||||
#include "IStream.hpp"
|
||||
|
||||
namespace Athena
|
||||
@@ -597,32 +591,10 @@ public:
|
||||
*/
|
||||
inline std::string readWStringAsString(atInt32 fixedLen = -1)
|
||||
{
|
||||
#if _WIN32
|
||||
std::wstring wstr;
|
||||
atUint16 chr = readUint16();
|
||||
|
||||
atInt32 i;
|
||||
for (i=0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
break;
|
||||
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
wstr += chr;
|
||||
chr = readUint16();
|
||||
}
|
||||
|
||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
||||
std::string retval(len, '\0');
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
||||
#else
|
||||
std::string retval;
|
||||
atUint16 chr = readUint16();
|
||||
|
||||
atInt32 i;
|
||||
std::mbstate_t state = {};
|
||||
for (i=0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
@@ -631,12 +603,18 @@ public:
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
char mb[MB_LEN_MAX];
|
||||
int c = std::wcrtomb(mb, chr, &state);
|
||||
retval.append(mb, c);
|
||||
utf8proc_uint8_t mb[4];
|
||||
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||
if (c < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while encoding");
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval.append(reinterpret_cast<char*>(mb), c);
|
||||
chr = readUint16();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fixedLen >= 0 && i < fixedLen)
|
||||
seek(fixedLen - i);
|
||||
|
||||
@@ -645,32 +623,10 @@ public:
|
||||
|
||||
inline std::string readWStringAsStringLittle(atInt32 fixedLen = -1)
|
||||
{
|
||||
#if _WIN32
|
||||
std::wstring wstr;
|
||||
atUint16 chr = readUint16Little();
|
||||
|
||||
atInt32 i;
|
||||
for (i=0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
break;
|
||||
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
wstr += chr;
|
||||
chr = readUint16Little();
|
||||
}
|
||||
|
||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
||||
std::string retval(len, '\0');
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
||||
#else
|
||||
std::string retval;
|
||||
atUint16 chr = readUint16Little();
|
||||
|
||||
atInt32 i;
|
||||
std::mbstate_t state = {};
|
||||
for (i=0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
@@ -679,12 +635,18 @@ public:
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
char mb[MB_LEN_MAX];
|
||||
int c = std::wcrtomb(mb, chr, &state);
|
||||
retval.append(mb, c);
|
||||
utf8proc_uint8_t mb[4];
|
||||
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||
if (c < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while encoding");
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval.append(reinterpret_cast<char*>(mb), c);
|
||||
chr = readUint16Little();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fixedLen >= 0 && i < fixedLen)
|
||||
seek(fixedLen - i);
|
||||
|
||||
@@ -693,32 +655,10 @@ public:
|
||||
|
||||
inline std::string readWStringAsStringBig(atInt32 fixedLen = -1)
|
||||
{
|
||||
#if _WIN32
|
||||
std::wstring wstr;
|
||||
atUint16 chr = readUint16Big();
|
||||
|
||||
atInt32 i;
|
||||
for (i=0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
break;
|
||||
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
wstr += chr;
|
||||
chr = readUint16Big();
|
||||
}
|
||||
|
||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
||||
std::string retval(len, '\0');
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
||||
#else
|
||||
std::string retval;
|
||||
atUint16 chr = readUint16Big();
|
||||
|
||||
atInt32 i;
|
||||
std::mbstate_t state = {};
|
||||
for (i = 0 ;; ++i)
|
||||
{
|
||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||
@@ -727,12 +667,18 @@ public:
|
||||
if (!chr)
|
||||
break;
|
||||
|
||||
char mb[MB_LEN_MAX];
|
||||
int c = std::wcrtomb(mb, chr, &state);
|
||||
retval.append(mb, c);
|
||||
utf8proc_uint8_t mb[4];
|
||||
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||
if (c < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while encoding");
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval.append(reinterpret_cast<char*>(mb), c);
|
||||
chr = readUint16Big();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fixedLen >= 0 && i < fixedLen)
|
||||
seek(fixedLen - i);
|
||||
|
||||
|
||||
@@ -1,13 +1,7 @@
|
||||
#ifndef ISTREAMWRITER_HPP
|
||||
#define ISTREAMWRITER_HPP
|
||||
|
||||
#if _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include "utf8proc.h"
|
||||
#include "IStream.hpp"
|
||||
|
||||
namespace Athena
|
||||
@@ -444,44 +438,21 @@ public:
|
||||
inline void writeStringAsWString(const std::string& str, atInt32 fixedLen = -1)
|
||||
{
|
||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||
|
||||
#if _WIN32
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
||||
std::wstring retval(len, L'\0');
|
||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
for (wchar_t ch : retval)
|
||||
{
|
||||
if (ch != 0xFEFF)
|
||||
writeUint16(ch);
|
||||
}
|
||||
writeUint16(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||
{
|
||||
wchar_t wc = retval[i];
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
--i;
|
||||
continue;
|
||||
}
|
||||
writeUint16(wc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
const char* buf = tmpStr.c_str();
|
||||
std::mbstate_t state = {};
|
||||
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
while (*buf)
|
||||
{
|
||||
wchar_t wc;
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
utf8proc_int32_t wc;
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
if (wc != 0xFEFF)
|
||||
writeUint16(wc);
|
||||
writeUint16(atUint16(wc));
|
||||
}
|
||||
writeUint16(0);
|
||||
}
|
||||
@@ -489,9 +460,17 @@ public:
|
||||
{
|
||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||
{
|
||||
wchar_t wc = 0;
|
||||
utf8proc_int32_t wc = 0;
|
||||
if (*buf)
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
{
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
}
|
||||
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
@@ -499,53 +478,29 @@ public:
|
||||
continue;
|
||||
}
|
||||
|
||||
writeUint16(wc);
|
||||
writeUint16(atUint16(wc));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void writeStringAsWStringLittle(const std::string& str, atInt32 fixedLen = -1)
|
||||
{
|
||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||
|
||||
#if _WIN32
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
||||
std::wstring retval(len, L'\0');
|
||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
for (wchar_t ch : retval)
|
||||
{
|
||||
if (ch != 0xFEFF)
|
||||
writeUint16(ch);
|
||||
}
|
||||
writeUint16Little(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (atInt32 i = 0; i<fixedLen; ++i)
|
||||
{
|
||||
wchar_t wc = retval[i];
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
--i;
|
||||
continue;
|
||||
}
|
||||
writeUint16Little(wc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
const char* buf = tmpStr.c_str();
|
||||
std::mbstate_t state = {};
|
||||
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
while (*buf)
|
||||
{
|
||||
wchar_t wc;
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
utf8proc_int32_t wc;
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
if (wc != 0xFEFF)
|
||||
writeUint16Little(wc);
|
||||
writeUint16Little(atUint16(wc));
|
||||
}
|
||||
writeUint16Little(0);
|
||||
}
|
||||
@@ -553,9 +508,17 @@ public:
|
||||
{
|
||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||
{
|
||||
wchar_t wc = 0;
|
||||
utf8proc_int32_t wc = 0;
|
||||
if (*buf)
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
{
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
}
|
||||
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
@@ -563,53 +526,29 @@ public:
|
||||
continue;
|
||||
}
|
||||
|
||||
writeUint16Little(wc);
|
||||
writeUint16Little(atUint16(wc));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void writeStringAsWStringBig(const std::string& str, atInt32 fixedLen = -1)
|
||||
{
|
||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||
|
||||
#if _WIN32
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
||||
std::wstring retval(len, L'\0');
|
||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
for (wchar_t ch : retval)
|
||||
{
|
||||
if (ch != 0xFEFF)
|
||||
writeUint16(ch);
|
||||
}
|
||||
writeUint16Big(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (atInt32 i = 0; i<fixedLen; ++i)
|
||||
{
|
||||
wchar_t wc = retval[i];
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
--i;
|
||||
continue;
|
||||
}
|
||||
writeUint16Big(wc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
const char* buf = tmpStr.c_str();
|
||||
std::mbstate_t state = {};
|
||||
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||
if (fixedLen < 0)
|
||||
{
|
||||
while (*buf)
|
||||
{
|
||||
wchar_t wc;
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
utf8proc_int32_t wc;
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
if (wc != 0xFEFF)
|
||||
writeUint16Big(wc);
|
||||
writeUint16Big(atUint16(wc));
|
||||
}
|
||||
writeUint16Big(0);
|
||||
}
|
||||
@@ -617,9 +556,17 @@ public:
|
||||
{
|
||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||
{
|
||||
wchar_t wc = 0;
|
||||
utf8proc_int32_t wc = 0;
|
||||
if (*buf)
|
||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
||||
{
|
||||
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||
if (len < 0)
|
||||
{
|
||||
atWarning("invalid UTF-8 character while decoding");
|
||||
return;
|
||||
}
|
||||
buf += len;
|
||||
}
|
||||
|
||||
if (wc == 0xFEFF)
|
||||
{
|
||||
@@ -627,10 +574,9 @@ public:
|
||||
continue;
|
||||
}
|
||||
|
||||
writeUint16Big(wc);
|
||||
writeUint16Big(atUint16(wc));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! \brief Writes an string to the buffer and advances the buffer.
|
||||
|
||||
Reference in New Issue
Block a user