mirror of https://github.com/libAthena/athena.git
stripped-down utf8proc added
This commit is contained in:
parent
9c44d5f4de
commit
bca146dbfc
|
@ -17,7 +17,7 @@ set(ATHENA_VERSION
|
||||||
|
|
||||||
add_subdirectory(extern)
|
add_subdirectory(extern)
|
||||||
|
|
||||||
include_directories(include ${LZO_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR} ${YAML_INCLUDE_DIR})
|
include_directories(include ${LZO_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR})
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||||
endif()
|
endif()
|
||||||
|
@ -69,6 +69,8 @@ add_library(AthenaCore
|
||||||
include/gekko_support.h
|
include/gekko_support.h
|
||||||
include/Athena/DNA.hpp
|
include/Athena/DNA.hpp
|
||||||
include/Athena/DNAYaml.hpp
|
include/Athena/DNAYaml.hpp
|
||||||
|
include/yaml.h
|
||||||
|
include/utf8proc.h
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library(AthenaSakura
|
add_library(AthenaSakura
|
||||||
|
|
|
@ -5,9 +5,7 @@ set(YAML_VERSION_STRING "${YAML_VERSION_MAJOR}.${YAML_VERSION_MINOR}.${YAML_VERS
|
||||||
|
|
||||||
file(GLOB SRC src/*.c)
|
file(GLOB SRC src/*.c)
|
||||||
|
|
||||||
include_directories(include win32)
|
include_directories(../../include)
|
||||||
add_library(AthenaLibYaml STATIC ${SRC} include/yaml.h)
|
add_library(AthenaLibYaml STATIC ${SRC})
|
||||||
|
|
||||||
set(YAML_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "YAML include path" FORCE)
|
|
||||||
#install(DIRECTORY include/ DESTINATION include/Athena COMPONENT yaml)
|
|
||||||
install(TARGETS AthenaLibYaml DESTINATION lib COMPONENT yaml)
|
install(TARGETS AthenaLibYaml DESTINATION lib COMPONENT yaml)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,15 +7,9 @@
|
||||||
* Any changes to the types or namespacing must be reflected in 'atdna/main.cpp'
|
* Any changes to the types or namespacing must be reflected in 'atdna/main.cpp'
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if _WIN32
|
|
||||||
#ifndef WIN32_LEAN_AND_MEAN
|
|
||||||
#define WIN32_LEAN_AND_MEAN 1
|
|
||||||
#endif
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <yaml.h>
|
#include <yaml.h>
|
||||||
|
#include <utf8proc.h>
|
||||||
#include "DNA.hpp"
|
#include "DNA.hpp"
|
||||||
|
|
||||||
namespace Athena
|
namespace Athena
|
||||||
|
@ -381,44 +375,40 @@ inline std::unique_ptr<YAMLNode> ValToNode(const char* val)
|
||||||
template <>
|
template <>
|
||||||
inline std::wstring NodeToVal(const YAMLNode* node)
|
inline std::wstring NodeToVal(const YAMLNode* node)
|
||||||
{
|
{
|
||||||
#if _WIN32
|
|
||||||
int len = MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), nullptr, 0);
|
|
||||||
std::wstring retval(len, L'\0');
|
|
||||||
MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), &retval[0], len);
|
|
||||||
return retval;
|
|
||||||
#else
|
|
||||||
std::wstring retval;
|
std::wstring retval;
|
||||||
retval.reserve(node->m_scalarString.length());
|
retval.reserve(node->m_scalarString.length());
|
||||||
const char* buf = node->m_scalarString.c_str();
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(node->m_scalarString.c_str());
|
||||||
std::mbstate_t state = {};
|
|
||||||
while (*buf)
|
while (*buf)
|
||||||
{
|
{
|
||||||
wchar_t wc;
|
utf8proc_int32_t wc;
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
retval += wc;
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
|
retval += wchar_t(wc);
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline std::unique_ptr<YAMLNode> ValToNode(const std::wstring& val)
|
inline std::unique_ptr<YAMLNode> ValToNode(const std::wstring& val)
|
||||||
{
|
{
|
||||||
YAMLNode* ret = new YAMLNode(YAML_SCALAR_NODE);
|
YAMLNode* ret = new YAMLNode(YAML_SCALAR_NODE);
|
||||||
#if _WIN32
|
|
||||||
int len = WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), nullptr, 0, nullptr, nullptr);
|
|
||||||
ret->m_scalarString.assign(len, '\0');
|
|
||||||
WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), &ret->m_scalarString[0], len, nullptr, nullptr);
|
|
||||||
#else
|
|
||||||
ret->m_scalarString.reserve(val.length());
|
ret->m_scalarString.reserve(val.length());
|
||||||
std::mbstate_t state = {};
|
|
||||||
for (wchar_t ch : val)
|
for (wchar_t ch : val)
|
||||||
{
|
{
|
||||||
char mb[MB_LEN_MAX];
|
utf8proc_uint8_t mb[4];
|
||||||
int c = std::wcrtomb(mb, ch, &state);
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
||||||
ret->m_scalarString.append(mb, c);
|
if (c < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while encoding");
|
||||||
|
return std::unique_ptr<YAMLNode>(ret);
|
||||||
|
}
|
||||||
|
ret->m_scalarString.append(reinterpret_cast<char*>(mb), c);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
return std::unique_ptr<YAMLNode>(ret);
|
return std::unique_ptr<YAMLNode>(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,9 @@
|
||||||
#ifndef ISTREAMREADER_HPP
|
#ifndef ISTREAMREADER_HPP
|
||||||
#define ISTREAMREADER_HPP
|
#define ISTREAMREADER_HPP
|
||||||
|
|
||||||
#if _WIN32
|
|
||||||
#ifndef WIN32_LEAN_AND_MEAN
|
|
||||||
#define WIN32_LEAN_AND_MEAN 1
|
|
||||||
#endif
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include "utf8proc.h"
|
||||||
#include "IStream.hpp"
|
#include "IStream.hpp"
|
||||||
|
|
||||||
namespace Athena
|
namespace Athena
|
||||||
|
@ -597,32 +591,10 @@ public:
|
||||||
*/
|
*/
|
||||||
inline std::string readWStringAsString(atInt32 fixedLen = -1)
|
inline std::string readWStringAsString(atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
#if _WIN32
|
|
||||||
std::wstring wstr;
|
|
||||||
atUint16 chr = readUint16();
|
|
||||||
|
|
||||||
atInt32 i;
|
|
||||||
for (i=0 ;; ++i)
|
|
||||||
{
|
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!chr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
wstr += chr;
|
|
||||||
chr = readUint16();
|
|
||||||
}
|
|
||||||
|
|
||||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
|
||||||
std::string retval(len, '\0');
|
|
||||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
|
||||||
#else
|
|
||||||
std::string retval;
|
std::string retval;
|
||||||
atUint16 chr = readUint16();
|
atUint16 chr = readUint16();
|
||||||
|
|
||||||
atInt32 i;
|
atInt32 i;
|
||||||
std::mbstate_t state = {};
|
|
||||||
for (i=0 ;; ++i)
|
for (i=0 ;; ++i)
|
||||||
{
|
{
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||||
|
@ -631,12 +603,18 @@ public:
|
||||||
if (!chr)
|
if (!chr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
char mb[MB_LEN_MAX];
|
utf8proc_uint8_t mb[4];
|
||||||
int c = std::wcrtomb(mb, chr, &state);
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||||
retval.append(mb, c);
|
if (c < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while encoding");
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
retval.append(reinterpret_cast<char*>(mb), c);
|
||||||
chr = readUint16();
|
chr = readUint16();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
if (fixedLen >= 0 && i < fixedLen)
|
if (fixedLen >= 0 && i < fixedLen)
|
||||||
seek(fixedLen - i);
|
seek(fixedLen - i);
|
||||||
|
|
||||||
|
@ -645,32 +623,10 @@ public:
|
||||||
|
|
||||||
inline std::string readWStringAsStringLittle(atInt32 fixedLen = -1)
|
inline std::string readWStringAsStringLittle(atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
#if _WIN32
|
|
||||||
std::wstring wstr;
|
|
||||||
atUint16 chr = readUint16Little();
|
|
||||||
|
|
||||||
atInt32 i;
|
|
||||||
for (i=0 ;; ++i)
|
|
||||||
{
|
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!chr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
wstr += chr;
|
|
||||||
chr = readUint16Little();
|
|
||||||
}
|
|
||||||
|
|
||||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
|
||||||
std::string retval(len, '\0');
|
|
||||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
|
||||||
#else
|
|
||||||
std::string retval;
|
std::string retval;
|
||||||
atUint16 chr = readUint16Little();
|
atUint16 chr = readUint16Little();
|
||||||
|
|
||||||
atInt32 i;
|
atInt32 i;
|
||||||
std::mbstate_t state = {};
|
|
||||||
for (i=0 ;; ++i)
|
for (i=0 ;; ++i)
|
||||||
{
|
{
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||||
|
@ -679,12 +635,18 @@ public:
|
||||||
if (!chr)
|
if (!chr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
char mb[MB_LEN_MAX];
|
utf8proc_uint8_t mb[4];
|
||||||
int c = std::wcrtomb(mb, chr, &state);
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||||
retval.append(mb, c);
|
if (c < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while encoding");
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
retval.append(reinterpret_cast<char*>(mb), c);
|
||||||
chr = readUint16Little();
|
chr = readUint16Little();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
if (fixedLen >= 0 && i < fixedLen)
|
if (fixedLen >= 0 && i < fixedLen)
|
||||||
seek(fixedLen - i);
|
seek(fixedLen - i);
|
||||||
|
|
||||||
|
@ -693,32 +655,10 @@ public:
|
||||||
|
|
||||||
inline std::string readWStringAsStringBig(atInt32 fixedLen = -1)
|
inline std::string readWStringAsStringBig(atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
#if _WIN32
|
|
||||||
std::wstring wstr;
|
|
||||||
atUint16 chr = readUint16Big();
|
|
||||||
|
|
||||||
atInt32 i;
|
|
||||||
for (i=0 ;; ++i)
|
|
||||||
{
|
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!chr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
wstr += chr;
|
|
||||||
chr = readUint16Big();
|
|
||||||
}
|
|
||||||
|
|
||||||
int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
|
|
||||||
std::string retval(len, '\0');
|
|
||||||
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
|
|
||||||
#else
|
|
||||||
std::string retval;
|
std::string retval;
|
||||||
atUint16 chr = readUint16Big();
|
atUint16 chr = readUint16Big();
|
||||||
|
|
||||||
atInt32 i;
|
atInt32 i;
|
||||||
std::mbstate_t state = {};
|
|
||||||
for (i = 0 ;; ++i)
|
for (i = 0 ;; ++i)
|
||||||
{
|
{
|
||||||
if (fixedLen >= 0 && i >= fixedLen - 1)
|
if (fixedLen >= 0 && i >= fixedLen - 1)
|
||||||
|
@ -727,12 +667,18 @@ public:
|
||||||
if (!chr)
|
if (!chr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
char mb[MB_LEN_MAX];
|
utf8proc_uint8_t mb[4];
|
||||||
int c = std::wcrtomb(mb, chr, &state);
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
|
||||||
retval.append(mb, c);
|
if (c < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while encoding");
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
retval.append(reinterpret_cast<char*>(mb), c);
|
||||||
chr = readUint16Big();
|
chr = readUint16Big();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
if (fixedLen >= 0 && i < fixedLen)
|
if (fixedLen >= 0 && i < fixedLen)
|
||||||
seek(fixedLen - i);
|
seek(fixedLen - i);
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,7 @@
|
||||||
#ifndef ISTREAMWRITER_HPP
|
#ifndef ISTREAMWRITER_HPP
|
||||||
#define ISTREAMWRITER_HPP
|
#define ISTREAMWRITER_HPP
|
||||||
|
|
||||||
#if _WIN32
|
#include "utf8proc.h"
|
||||||
#ifndef WIN32_LEAN_AND_MEAN
|
|
||||||
#define WIN32_LEAN_AND_MEAN 1
|
|
||||||
#endif
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "IStream.hpp"
|
#include "IStream.hpp"
|
||||||
|
|
||||||
namespace Athena
|
namespace Athena
|
||||||
|
@ -444,44 +438,21 @@ public:
|
||||||
inline void writeStringAsWString(const std::string& str, atInt32 fixedLen = -1)
|
inline void writeStringAsWString(const std::string& str, atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||||
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||||
#if _WIN32
|
|
||||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
|
||||||
std::wstring retval(len, L'\0');
|
|
||||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
|
||||||
if (fixedLen < 0)
|
|
||||||
{
|
|
||||||
for (wchar_t ch : retval)
|
|
||||||
{
|
|
||||||
if (ch != 0xFEFF)
|
|
||||||
writeUint16(ch);
|
|
||||||
}
|
|
||||||
writeUint16(0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
|
||||||
{
|
|
||||||
wchar_t wc = retval[i];
|
|
||||||
if (wc == 0xFEFF)
|
|
||||||
{
|
|
||||||
--i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
writeUint16(wc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
const char* buf = tmpStr.c_str();
|
|
||||||
std::mbstate_t state = {};
|
|
||||||
if (fixedLen < 0)
|
if (fixedLen < 0)
|
||||||
{
|
{
|
||||||
while (*buf)
|
while (*buf)
|
||||||
{
|
{
|
||||||
wchar_t wc;
|
utf8proc_int32_t wc;
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
if (wc != 0xFEFF)
|
if (wc != 0xFEFF)
|
||||||
writeUint16(wc);
|
writeUint16(atUint16(wc));
|
||||||
}
|
}
|
||||||
writeUint16(0);
|
writeUint16(0);
|
||||||
}
|
}
|
||||||
|
@ -489,9 +460,17 @@ public:
|
||||||
{
|
{
|
||||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||||
{
|
{
|
||||||
wchar_t wc = 0;
|
utf8proc_int32_t wc = 0;
|
||||||
if (*buf)
|
if (*buf)
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
{
|
||||||
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
|
}
|
||||||
|
|
||||||
if (wc == 0xFEFF)
|
if (wc == 0xFEFF)
|
||||||
{
|
{
|
||||||
|
@ -499,53 +478,29 @@ public:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeUint16(wc);
|
writeUint16(atUint16(wc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeStringAsWStringLittle(const std::string& str, atInt32 fixedLen = -1)
|
inline void writeStringAsWStringLittle(const std::string& str, atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||||
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||||
#if _WIN32
|
|
||||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
|
||||||
std::wstring retval(len, L'\0');
|
|
||||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
|
||||||
if (fixedLen < 0)
|
|
||||||
{
|
|
||||||
for (wchar_t ch : retval)
|
|
||||||
{
|
|
||||||
if (ch != 0xFEFF)
|
|
||||||
writeUint16(ch);
|
|
||||||
}
|
|
||||||
writeUint16Little(0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (atInt32 i = 0; i<fixedLen; ++i)
|
|
||||||
{
|
|
||||||
wchar_t wc = retval[i];
|
|
||||||
if (wc == 0xFEFF)
|
|
||||||
{
|
|
||||||
--i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
writeUint16Little(wc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
const char* buf = tmpStr.c_str();
|
|
||||||
std::mbstate_t state = {};
|
|
||||||
if (fixedLen < 0)
|
if (fixedLen < 0)
|
||||||
{
|
{
|
||||||
while (*buf)
|
while (*buf)
|
||||||
{
|
{
|
||||||
wchar_t wc;
|
utf8proc_int32_t wc;
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
if (wc != 0xFEFF)
|
if (wc != 0xFEFF)
|
||||||
writeUint16Little(wc);
|
writeUint16Little(atUint16(wc));
|
||||||
}
|
}
|
||||||
writeUint16Little(0);
|
writeUint16Little(0);
|
||||||
}
|
}
|
||||||
|
@ -553,9 +508,17 @@ public:
|
||||||
{
|
{
|
||||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||||
{
|
{
|
||||||
wchar_t wc = 0;
|
utf8proc_int32_t wc = 0;
|
||||||
if (*buf)
|
if (*buf)
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
{
|
||||||
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
|
}
|
||||||
|
|
||||||
if (wc == 0xFEFF)
|
if (wc == 0xFEFF)
|
||||||
{
|
{
|
||||||
|
@ -563,53 +526,29 @@ public:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeUint16Little(wc);
|
writeUint16Little(atUint16(wc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void writeStringAsWStringBig(const std::string& str, atInt32 fixedLen = -1)
|
inline void writeStringAsWStringBig(const std::string& str, atInt32 fixedLen = -1)
|
||||||
{
|
{
|
||||||
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
std::string tmpStr = "\xEF\xBB\xBF" + str;
|
||||||
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
|
||||||
#if _WIN32
|
|
||||||
int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
|
|
||||||
std::wstring retval(len, L'\0');
|
|
||||||
MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
|
|
||||||
if (fixedLen < 0)
|
|
||||||
{
|
|
||||||
for (wchar_t ch : retval)
|
|
||||||
{
|
|
||||||
if (ch != 0xFEFF)
|
|
||||||
writeUint16(ch);
|
|
||||||
}
|
|
||||||
writeUint16Big(0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (atInt32 i = 0; i<fixedLen; ++i)
|
|
||||||
{
|
|
||||||
wchar_t wc = retval[i];
|
|
||||||
if (wc == 0xFEFF)
|
|
||||||
{
|
|
||||||
--i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
writeUint16Big(wc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
const char* buf = tmpStr.c_str();
|
|
||||||
std::mbstate_t state = {};
|
|
||||||
if (fixedLen < 0)
|
if (fixedLen < 0)
|
||||||
{
|
{
|
||||||
while (*buf)
|
while (*buf)
|
||||||
{
|
{
|
||||||
wchar_t wc;
|
utf8proc_int32_t wc;
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
if (wc != 0xFEFF)
|
if (wc != 0xFEFF)
|
||||||
writeUint16Big(wc);
|
writeUint16Big(atUint16(wc));
|
||||||
}
|
}
|
||||||
writeUint16Big(0);
|
writeUint16Big(0);
|
||||||
}
|
}
|
||||||
|
@ -617,9 +556,17 @@ public:
|
||||||
{
|
{
|
||||||
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
for (atInt32 i=0 ; i<fixedLen ; ++i)
|
||||||
{
|
{
|
||||||
wchar_t wc = 0;
|
utf8proc_int32_t wc = 0;
|
||||||
if (*buf)
|
if (*buf)
|
||||||
buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
|
{
|
||||||
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atWarning("invalid UTF-8 character while decoding");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
|
}
|
||||||
|
|
||||||
if (wc == 0xFEFF)
|
if (wc == 0xFEFF)
|
||||||
{
|
{
|
||||||
|
@ -627,10 +574,9 @@ public:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeUint16Big(wc);
|
writeUint16Big(atUint16(wc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! \brief Writes an string to the buffer and advances the buffer.
|
/*! \brief Writes an string to the buffer and advances the buffer.
|
||||||
|
|
|
@ -0,0 +1,226 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @mainpage
|
||||||
|
*
|
||||||
|
* utf8proc is a free/open-source (MIT/expat licensed) C library
|
||||||
|
* providing Unicode normalization, case-folding, and other operations
|
||||||
|
* for strings in the UTF-8 encoding, supporting Unicode version
|
||||||
|
* 7.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
|
||||||
|
* for downloads and other information, or the source code on github
|
||||||
|
* (https://github.com/JuliaLang/utf8proc).
|
||||||
|
*
|
||||||
|
* For the utf8proc API documentation, see: @ref utf8proc.h
|
||||||
|
*
|
||||||
|
* The features of utf8proc include:
|
||||||
|
*
|
||||||
|
* - Transformation of strings (@ref utf8proc_map) to:
|
||||||
|
* - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character)
|
||||||
|
* - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
|
||||||
|
* - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK)
|
||||||
|
* - case-folding (@ref UTF8PROC_CASEFOLD)
|
||||||
|
* - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
|
||||||
|
* - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
|
||||||
|
* - Character-width computation: @ref utf8proc_charwidth
|
||||||
|
* - Classification of characters by Unicode category: @ref utf8proc_category and @ref utf8proc_category_string
|
||||||
|
* - Encode (@ref utf8proc_encode_char) and decode (@ref utf8proc_iterate) Unicode codepoints to/from UTF-8.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @file */
|
||||||
|
|
||||||
|
#ifndef UTF8PROC_H
|
||||||
|
#define UTF8PROC_H
|
||||||
|
|
||||||
|
/** @name API version
|
||||||
|
*
|
||||||
|
* The utf8proc API version MAJOR.MINOR.PATCH, following
|
||||||
|
* semantic-versioning rules (http://semver.org) based on API
|
||||||
|
* compatibility.
|
||||||
|
*
|
||||||
|
* This is also returned at runtime by @ref utf8proc_version; however, the
|
||||||
|
* runtime version may append a string like "-dev" to the version number
|
||||||
|
* for prerelease versions.
|
||||||
|
*
|
||||||
|
* @note The shared-library version number in the Makefile may be different,
|
||||||
|
* being based on ABI compatibility rather than API compatibility.
|
||||||
|
*/
|
||||||
|
/** @{ */
|
||||||
|
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
||||||
|
#define UTF8PROC_VERSION_MAJOR 1
|
||||||
|
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
||||||
|
#define UTF8PROC_VERSION_MINOR 3
|
||||||
|
/** The PATCH version (increased for fixes that do not change the API). */
|
||||||
|
#define UTF8PROC_VERSION_PATCH 0
|
||||||
|
/** @} */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
typedef signed char utf8proc_int8_t;
|
||||||
|
typedef unsigned char utf8proc_uint8_t;
|
||||||
|
typedef short utf8proc_int16_t;
|
||||||
|
typedef unsigned short utf8proc_uint16_t;
|
||||||
|
typedef int utf8proc_int32_t;
|
||||||
|
typedef unsigned int utf8proc_uint32_t;
|
||||||
|
# ifdef _WIN64
|
||||||
|
typedef __int64 utf8proc_ssize_t;
|
||||||
|
typedef unsigned __int64 utf8proc_size_t;
|
||||||
|
# else
|
||||||
|
typedef int utf8proc_ssize_t;
|
||||||
|
typedef unsigned int utf8proc_size_t;
|
||||||
|
# endif
|
||||||
|
# ifndef __cplusplus
|
||||||
|
typedef unsigned char utf8proc_bool;
|
||||||
|
enum {false, true};
|
||||||
|
# else
|
||||||
|
typedef bool utf8proc_bool;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# include <stdbool.h>
|
||||||
|
# include <inttypes.h>
|
||||||
|
typedef int8_t utf8proc_int8_t;
|
||||||
|
typedef uint8_t utf8proc_uint8_t;
|
||||||
|
typedef int16_t utf8proc_int16_t;
|
||||||
|
typedef uint16_t utf8proc_uint16_t;
|
||||||
|
typedef int32_t utf8proc_int32_t;
|
||||||
|
typedef uint32_t utf8proc_uint32_t;
|
||||||
|
typedef size_t utf8proc_size_t;
|
||||||
|
typedef ssize_t utf8proc_ssize_t;
|
||||||
|
typedef bool utf8proc_bool;
|
||||||
|
#endif
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
/** @name Error codes
|
||||||
|
* Error codes being returned by almost all functions.
|
||||||
|
*/
|
||||||
|
/** @{ */
|
||||||
|
/** Memory could not be allocated. */
|
||||||
|
#define UTF8PROC_ERROR_NOMEM -1
|
||||||
|
/** The given string is too long to be processed. */
|
||||||
|
#define UTF8PROC_ERROR_OVERFLOW -2
|
||||||
|
/** The given string is not a legal UTF-8 string. */
|
||||||
|
#define UTF8PROC_ERROR_INVALIDUTF8 -3
|
||||||
|
/** The @ref UTF8PROC_REJECTNA flag was set and an unassigned codepoint was found. */
|
||||||
|
#define UTF8PROC_ERROR_NOTASSIGNED -4
|
||||||
|
/** Invalid options have been used. */
|
||||||
|
#define UTF8PROC_ERROR_INVALIDOPTS -5
|
||||||
|
/** @} */
|
||||||
|
|
||||||
|
#define UTF8PROC_cont(ch) (((ch) & 0xc0) == 0x80)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
||||||
|
* The maximum number of bytes read is `strlen`, unless `strlen` is
|
||||||
|
* negative (in which case up to 4 bytes are read).
|
||||||
|
*
|
||||||
|
* If a valid codepoint could be read, it is stored in the variable
|
||||||
|
* pointed to by `codepoint_ref`, otherwise that variable will be set to -1.
|
||||||
|
* In case of success, the number of bytes read is returned; otherwise, a
|
||||||
|
* negative error code is returned.
|
||||||
|
*/
|
||||||
|
static inline utf8proc_ssize_t utf8proc_iterate(
|
||||||
|
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||||
|
) {
|
||||||
|
utf8proc_uint32_t uc;
|
||||||
|
const utf8proc_uint8_t *end;
|
||||||
|
|
||||||
|
*dst = -1;
|
||||||
|
if (!strlen) return 0;
|
||||||
|
end = str + ((strlen < 0) ? 4 : strlen);
|
||||||
|
uc = *str++;
|
||||||
|
if (uc < 0x80) {
|
||||||
|
*dst = uc;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
||||||
|
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
if (uc < 0xe0) { // 2-byte sequence
|
||||||
|
// Must have valid continuation character
|
||||||
|
if (!UTF8PROC_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
*dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if (uc < 0xf0) { // 3-byte sequence
|
||||||
|
if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
// Check for surrogate chars
|
||||||
|
if (uc == 0xed && *str > 0x9f)
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
|
||||||
|
if (uc < 0x800)
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
*dst = uc;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
// 4-byte sequence
|
||||||
|
// Must have 3 valid continuation characters
|
||||||
|
if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
// Make sure in correct range (0x10000 - 0x10ffff)
|
||||||
|
if (uc == 0xf0) {
|
||||||
|
if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
} else if (uc == 0xf4) {
|
||||||
|
if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
}
|
||||||
|
*dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes the codepoint as an UTF-8 string in the byte array pointed
|
||||||
|
* to by `dst`. This array must be at least 4 bytes long.
|
||||||
|
*
|
||||||
|
* In case of success the number of bytes written is returned, and
|
||||||
|
* otherwise 0 is returned.
|
||||||
|
*
|
||||||
|
* This function does not check whether `codepoint` is valid Unicode.
|
||||||
|
*/
|
||||||
|
static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||||
|
if (uc < 0x00) {
|
||||||
|
return 0;
|
||||||
|
} else if (uc < 0x80) {
|
||||||
|
dst[0] = uc;
|
||||||
|
return 1;
|
||||||
|
} else if (uc < 0x800) {
|
||||||
|
dst[0] = 0xC0 + (uc >> 6);
|
||||||
|
dst[1] = 0x80 + (uc & 0x3F);
|
||||||
|
return 2;
|
||||||
|
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
|
||||||
|
// the API, however, these are actually invalid in UTF-8
|
||||||
|
} else if (uc < 0x10000) {
|
||||||
|
dst[0] = 0xE0 + (uc >> 12);
|
||||||
|
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
||||||
|
dst[2] = 0x80 + (uc & 0x3F);
|
||||||
|
return 3;
|
||||||
|
} else if (uc < 0x110000) {
|
||||||
|
dst[0] = 0xF0 + (uc >> 18);
|
||||||
|
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
||||||
|
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
||||||
|
dst[3] = 0x80 + (uc & 0x3F);
|
||||||
|
return 4;
|
||||||
|
} else return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "Athena/SkywardSwordQuest.hpp"
|
#include "Athena/SkywardSwordQuest.hpp"
|
||||||
#include "Athena/Checksums.hpp"
|
#include "Athena/Checksums.hpp"
|
||||||
|
#include "utf8proc.h"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
namespace Athena
|
namespace Athena
|
||||||
|
@ -72,7 +73,7 @@ void SkywardSwordQuest::setPlayerName(const std::string& name)
|
||||||
if (name.length() > 8)
|
if (name.length() > 8)
|
||||||
atDebug("WARNING: name cannot be greater than 8 characters, automatically truncating");
|
atDebug("WARNING: name cannot be greater than 8 characters, automatically truncating");
|
||||||
|
|
||||||
const char* buf = name.c_str();
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(name.c_str());
|
||||||
for (atUint32 i = 0; i < 8; i++)
|
for (atUint32 i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
atUint16& c = *(atUint16*)(m_data.get() + priv::NAME_OFFSET + (i * 2));
|
atUint16& c = *(atUint16*)(m_data.get() + priv::NAME_OFFSET + (i * 2));
|
||||||
|
@ -83,9 +84,15 @@ void SkywardSwordQuest::setPlayerName(const std::string& name)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
wchar_t wc;
|
utf8proc_int32_t wc;
|
||||||
buf += std::mbtowc(&wc, buf, MB_CUR_MAX);
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
||||||
c = wc;
|
if (len < 0)
|
||||||
|
{
|
||||||
|
atError("invalid UTF-8 string");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf += len;
|
||||||
|
c = atUint16(wc);
|
||||||
utility::BigUint16(c);
|
utility::BigUint16(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -102,9 +109,10 @@ std::string SkywardSwordQuest::playerName() const
|
||||||
break;
|
break;
|
||||||
|
|
||||||
utility::BigUint16(c);
|
utility::BigUint16(c);
|
||||||
char mb[4];
|
utf8proc_uint8_t mb[4];
|
||||||
int cs = std::wctomb(mb, c);
|
utf8proc_ssize_t cs = utf8proc_encode_char(utf8proc_int32_t(c), mb);
|
||||||
val.append(mb, cs);
|
if (cs)
|
||||||
|
val.append(reinterpret_cast<char*>(mb), cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
|
|
Loading…
Reference in New Issue