2019-09-04 21:22:05 +00:00
|
|
|
#include <logvisor/logvisor.hpp>
|
2021-05-02 04:01:56 +00:00
|
|
|
#include <athena/utf8proc.h>
|
2015-06-09 22:19:59 +00:00
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
namespace hecl {
|
2018-01-10 06:16:18 +00:00
|
|
|
static logvisor::Module Log("hecl-wsconv");
|
2015-06-09 22:19:59 +00:00
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
std::string WideToUTF8(std::wstring_view src) {
|
|
|
|
std::string retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
for (wchar_t ch : src) {
|
|
|
|
utf8proc_uint8_t mb[4];
|
|
|
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
|
|
|
if (c < 0) {
|
2020-04-11 22:48:11 +00:00
|
|
|
Log.report(logvisor::Warning, FMT_STRING("invalid UTF-8 character while encoding"));
|
2018-12-08 05:18:42 +00:00
|
|
|
return retval;
|
2015-08-25 07:02:10 +00:00
|
|
|
}
|
2018-12-08 05:18:42 +00:00
|
|
|
retval.append(reinterpret_cast<char*>(mb), c);
|
|
|
|
}
|
|
|
|
return retval;
|
2015-06-09 22:19:59 +00:00
|
|
|
}
|
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
std::string Char16ToUTF8(std::u16string_view src) {
|
|
|
|
std::string retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
for (char16_t ch : src) {
|
|
|
|
utf8proc_uint8_t mb[4];
|
|
|
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
|
|
|
if (c < 0) {
|
2020-04-11 22:48:11 +00:00
|
|
|
Log.report(logvisor::Warning, FMT_STRING("invalid UTF-8 character while encoding"));
|
2018-12-08 05:18:42 +00:00
|
|
|
return retval;
|
2017-01-24 07:40:09 +00:00
|
|
|
}
|
2018-12-08 05:18:42 +00:00
|
|
|
retval.append(reinterpret_cast<char*>(mb), c);
|
|
|
|
}
|
|
|
|
return retval;
|
2017-01-24 07:40:09 +00:00
|
|
|
}
|
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
std::wstring UTF8ToWide(std::string_view src) {
|
|
|
|
std::wstring retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(src.data());
|
|
|
|
while (*buf) {
|
|
|
|
utf8proc_int32_t wc;
|
|
|
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
|
|
|
if (len < 0) {
|
2020-04-11 22:48:11 +00:00
|
|
|
Log.report(logvisor::Warning, FMT_STRING("invalid UTF-8 character while decoding"));
|
2018-12-08 05:18:42 +00:00
|
|
|
return retval;
|
2015-08-25 07:02:10 +00:00
|
|
|
}
|
2018-12-08 05:18:42 +00:00
|
|
|
buf += len;
|
|
|
|
retval += wchar_t(wc);
|
|
|
|
}
|
|
|
|
return retval;
|
2015-06-09 22:19:59 +00:00
|
|
|
}
|
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
std::u16string UTF8ToChar16(std::string_view src) {
|
|
|
|
std::u16string retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(src.data());
|
|
|
|
while (*buf) {
|
|
|
|
utf8proc_int32_t wc;
|
|
|
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
|
|
|
if (len < 0) {
|
2020-04-11 22:48:11 +00:00
|
|
|
Log.report(logvisor::Warning, FMT_STRING("invalid UTF-8 character while decoding"));
|
2018-12-08 05:18:42 +00:00
|
|
|
return retval;
|
2017-01-24 07:40:09 +00:00
|
|
|
}
|
2018-12-08 05:18:42 +00:00
|
|
|
buf += len;
|
|
|
|
retval += char16_t(wc);
|
|
|
|
}
|
|
|
|
return retval;
|
2017-01-24 07:40:09 +00:00
|
|
|
}
|
|
|
|
|
2018-12-08 05:18:42 +00:00
|
|
|
} // namespace hecl
|