Use UTF-8 exclusively internally

This commit is contained in:
Luke Street 2021-06-28 18:58:22 -04:00
parent 1111fb4839
commit 4cf61b7d5e
12 changed files with 41 additions and 447 deletions

View File

@ -15,7 +15,6 @@ add_library(kabufuda STATIC
include/kabufuda/File.hpp lib/kabufuda/File.cpp
include/kabufuda/Util.hpp lib/kabufuda/Util.cpp
include/kabufuda/SRAM.hpp lib/kabufuda/SRAM.cpp
include/kabufuda/WideStringConvert.hpp lib/kabufuda/WideStringConvert.cpp
)
if(WIN32)

View File

@ -35,7 +35,7 @@ class AsyncIO {
public:
AsyncIO() = default;
explicit AsyncIO(SystemStringView filename, bool truncate = false);
explicit AsyncIO(std::string_view filename, bool truncate = false);
~AsyncIO();
AsyncIO(AsyncIO&& other);
AsyncIO& operator=(AsyncIO&& other);

View File

@ -102,7 +102,7 @@ class Card {
CardHeader m_ch;
CardHeader m_tmpCh;
SystemString m_filename;
std::string m_filename;
AsyncIO m_fileHandle;
std::array<Directory, 2> m_dirs;
std::array<BlockAllocationTable, 2> m_bats;
@ -450,7 +450,7 @@ public:
*
* @return ProbeResults structure.
*/
static ProbeResults probeCardFile(SystemStringView filename);
static ProbeResults probeCardFile(std::string_view filename);
/**
* @brief Writes any changes to the Card instance immediately to disk. <br />
@ -461,7 +461,7 @@ public:
/**
* @brief Opens card image (does nothing if currently open path matches).
*/
bool open(SystemStringView filepath);
bool open(std::string_view filepath);
/**
* @brief Commits changes to disk and closes host file.
@ -473,7 +473,7 @@ public:
*
* @return A view to the card's filename.
*/
SystemStringView cardFilename() const { return m_filename; }
std::string_view cardFilename() const { return m_filename; }
/**
* @brief Gets card-scope error state.

View File

@ -19,11 +19,7 @@
#define NOMINMAX
#endif
#include <Windows.h>
#include <cwchar>
#include "winsupport.hpp"
#if UNICODE
#define CARD_UCS2 1
#endif
#endif
#include <algorithm>
@ -32,8 +28,6 @@
#include <string>
#include <type_traits>
#include "kabufuda/WideStringConvert.hpp"
#undef bswap16
#undef bswap32
#undef bswap64
@ -177,72 +171,10 @@ constexpr double SBig(double val) { return val; }
#endif
#endif
#if CARD_UCS2
typedef wchar_t SystemChar;
inline size_t StrLen(const SystemChar* str) { return wcslen(str); }
typedef std::wstring SystemString;
typedef std::wstring_view SystemStringView;
inline void ToLower(SystemString& str) { std::transform(str.begin(), str.end(), str.begin(), towlower); }
inline void ToUpper(SystemString& str) { std::transform(str.begin(), str.end(), str.begin(), towupper); }
class SystemUTF8Conv {
std::string m_utf8;
public:
explicit SystemUTF8Conv(SystemStringView str) : m_utf8(WideToUTF8(str)) {}
std::string_view str() const { return m_utf8; }
const char* c_str() const { return m_utf8.c_str(); }
std::string operator+(std::string_view other) const { return m_utf8 + other.data(); }
};
inline std::string operator+(std::string_view lhs, const SystemUTF8Conv& rhs) { return std::string(lhs) + rhs.c_str(); }
class SystemStringConv {
std::wstring m_sys;
public:
explicit SystemStringConv(std::string_view str) : m_sys(UTF8ToWide(str)) {}
SystemStringView sys_str() const { return m_sys; }
const SystemChar* c_str() const { return m_sys.c_str(); }
std::wstring operator+(const std::wstring_view other) const { return m_sys + other.data(); }
};
inline std::wstring operator+(const std::wstring_view lhs, const SystemStringConv& rhs) {
return std::wstring(lhs) + rhs.c_str();
}
#ifndef _SYS_STR
#define _SYS_STR(val) L##val
#endif
typedef struct _stat Sstat;
#if _WIN32
using Sstat = struct ::_stat64;
#else
typedef char SystemChar;
inline size_t StrLen(const SystemChar* str) { return std::strlen(str); }
typedef std::string SystemString;
typedef std::string_view SystemStringView;
inline void ToLower(SystemString& str) { std::transform(str.begin(), str.end(), str.begin(), tolower); }
inline void ToUpper(SystemString& str) { std::transform(str.begin(), str.end(), str.begin(), toupper); }
class SystemUTF8Conv {
std::string_view m_utf8;
public:
explicit SystemUTF8Conv(SystemStringView str) : m_utf8(str) {}
std::string_view str() const { return m_utf8; }
const char* c_str() const { return m_utf8.data(); }
std::string operator+(std::string_view other) const { return std::string(m_utf8) + other.data(); }
};
inline std::string operator+(std::string_view lhs, const SystemUTF8Conv& rhs) { return std::string(lhs) + rhs.c_str(); }
class SystemStringConv {
SystemStringView m_sys;
public:
explicit SystemStringConv(std::string_view str) : m_sys(str) {}
SystemStringView sys_str() const { return m_sys; }
const SystemChar* c_str() const { return m_sys.data(); }
std::string operator+(std::string_view other) const { return std::string(m_sys) + other.data(); }
};
inline std::string operator+(std::string_view lhs, const SystemStringConv& rhs) {
return std::string(lhs) + rhs.c_str();
}
#ifndef _SYS_STR
#define _SYS_STR(val) val
#endif
typedef struct stat Sstat;
using SStat = struct stat;
#endif
uint64_t getGCTime();
@ -255,15 +187,32 @@ uint64_t getGCTime();
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
#endif
inline int Stat(const SystemChar* path, Sstat* statOut) {
#if CARD_UCS2
size_t pos;
for (pos = 0; pos < 3 && path[pos] != L'\0'; ++pos) {}
if (pos == 2 && path[1] == L':') {
SystemChar fixPath[4] = {path[0], L':', L'/', L'\0'};
return _wstat(fixPath, statOut);
#if _WIN32
class WStringConv {
std::wstring m_sys;
public:
explicit WStringConv(std::string_view str) {
int len = MultiByteToWideChar(CP_UTF8, 0, str.data(), str.size(), nullptr, 0);
m_sys.assign(len, L'\0');
MultiByteToWideChar(CP_UTF8, 0, str.data(), str.size(), &m_sys[0], len);
}
return _wstat(path, statOut);
[[nodiscard]] std::wstring str() const { return m_sys; }
[[nodiscard]] const wchar_t* c_str() const { return m_sys.c_str(); }
};
#endif
inline int Stat(const char* path, Sstat* statOut) {
#if _WIN32
size_t pos;
WStringConv wpath(path);
const wchar_t* wpathP = wpath.c_str();
for (pos = 0; pos < 3 && wpathP[pos] != L'\0'; ++pos) {}
if (pos == 2 && wpathP[1] == L':') {
wchar_t fixPath[4] = {wpathP[0], L':', L'/', L'\0'};
return _wstat64(fixPath, statOut);
}
return _wstat64(wpath.c_str(), statOut);
#else
return stat(path, statOut);
#endif

View File

@ -1,8 +0,0 @@
#pragma once
#include <string>
namespace kabufuda {
std::string WideToUTF8(std::wstring_view src);
std::wstring UTF8ToWide(std::string_view src);
} // namespace kabufuda

View File

@ -1,308 +0,0 @@
/*
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @mainpage
*
* utf8proc is a free/open-source (MIT/expat licensed) C library
* providing Unicode normalization, case-folding, and other operations
* for strings in the UTF-8 encoding, supporting Unicode version
* 7.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
* for downloads and other information, or the source code on github
* (https://github.com/JuliaLang/utf8proc).
*
* For the utf8proc API documentation, see: @ref utf8proc.h
*
* The features of utf8proc include:
*
* - Transformation of strings (@ref utf8proc_map) to:
* - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character)
* - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
* - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK)
* - case-folding (@ref UTF8PROC_CASEFOLD)
* - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
* - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
* - Character-width computation: @ref utf8proc_charwidth
* - Classification of characters by Unicode category: @ref utf8proc_category and @ref utf8proc_category_string
* - Encode (@ref utf8proc_encode_char) and decode (@ref utf8proc_iterate) Unicode codepoints to/from UTF-8.
*/
/** @file */
#ifndef UTF8PROC_H
#define UTF8PROC_H
/** @name API version
*
* The utf8proc API version MAJOR.MINOR.PATCH, following
* semantic-versioning rules (http://semver.org) based on API
* compatibility.
*
* This is also returned at runtime by @ref utf8proc_version; however, the
* runtime version may append a string like "-dev" to the version number
* for prerelease versions.
*
* @note The shared-library version number in the Makefile may be different,
* being based on ABI compatibility rather than API compatibility.
*/
/** @{ */
/** The MAJOR version number (increased when backwards API compatibility is broken). */
#define UTF8PROC_VERSION_MAJOR 1
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
#define UTF8PROC_VERSION_MINOR 3
/** The PATCH version (increased for fixes that do not change the API). */
#define UTF8PROC_VERSION_PATCH 0
/** @} */
#include <stdlib.h>
#include <sys/types.h>
#ifdef _MSC_VER
typedef signed char utf8proc_int8_t;
typedef unsigned char utf8proc_uint8_t;
typedef short utf8proc_int16_t;
typedef unsigned short utf8proc_uint16_t;
typedef int utf8proc_int32_t;
typedef unsigned int utf8proc_uint32_t;
# ifdef _WIN64
typedef __int64 utf8proc_ssize_t;
typedef unsigned __int64 utf8proc_size_t;
# else
typedef int utf8proc_ssize_t;
typedef unsigned int utf8proc_size_t;
# endif
# ifndef __cplusplus
typedef unsigned char utf8proc_bool;
enum {false, true};
# else
typedef bool utf8proc_bool;
# endif
#else
#ifdef __cplusplus
# include <cstdbool>
# include <cinttypes>
#else
# include <stdbool.h>
# include <inttypes.h>
#endif
typedef int8_t utf8proc_int8_t;
typedef uint8_t utf8proc_uint8_t;
typedef int16_t utf8proc_int16_t;
typedef uint16_t utf8proc_uint16_t;
typedef int32_t utf8proc_int32_t;
typedef uint32_t utf8proc_uint32_t;
typedef size_t utf8proc_size_t;
typedef ssize_t utf8proc_ssize_t;
typedef bool utf8proc_bool;
#endif
#ifdef __cplusplus
# include <climits>
#else
# include <limits.h>
#endif
/** @name Error codes
* Error codes being returned by almost all functions.
*/
/** @{ */
/** Memory could not be allocated. */
#define UTF8PROC_ERROR_NOMEM -1
/** The given string is too long to be processed. */
#define UTF8PROC_ERROR_OVERFLOW -2
/** The given string is not a legal UTF-8 string. */
#define UTF8PROC_ERROR_INVALIDUTF8 -3
/** The @ref UTF8PROC_REJECTNA flag was set and an unassigned codepoint was found. */
#define UTF8PROC_ERROR_NOTASSIGNED -4
/** Invalid options have been used. */
#define UTF8PROC_ERROR_INVALIDOPTS -5
/** @} */
#define UTF8PROC_cont(ch) (((ch) & 0xc0) == 0x80)
/**
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
* The maximum number of bytes read is `strlen`, unless `strlen` is
* negative (in which case up to 4 bytes are read).
*
* If a valid codepoint could be read, it is stored in the variable
* pointed to by `codepoint_ref`, otherwise that variable will be set to -1.
* In case of success, the number of bytes read is returned; otherwise, a
* negative error code is returned.
*/
static inline utf8proc_ssize_t utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
) {
utf8proc_uint32_t uc;
const utf8proc_uint8_t *end;
*dst = -1;
if (!strlen) return 0;
end = str + ((strlen < 0) ? 4 : strlen);
uc = *str++;
if (uc < 0x80) {
*dst = uc;
return 1;
}
// Must be between 0xc2 and 0xf4 inclusive to be valid
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
if (uc < 0xe0) { // 2-byte sequence
// Must have valid continuation character
if (!UTF8PROC_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
*dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
return 2;
}
if (uc < 0xf0) { // 3-byte sequence
if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
return UTF8PROC_ERROR_INVALIDUTF8;
// Check for surrogate chars
if (uc == 0xed && *str > 0x9f)
return UTF8PROC_ERROR_INVALIDUTF8;
uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
if (uc < 0x800)
return UTF8PROC_ERROR_INVALIDUTF8;
*dst = uc;
return 3;
}
// 4-byte sequence
// Must have 3 valid continuation characters
if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
return UTF8PROC_ERROR_INVALIDUTF8;
// Make sure in correct range (0x10000 - 0x10ffff)
if (uc == 0xf0) {
if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
} else if (uc == 0xf4) {
if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
}
*dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
return 4;
}
/**
* Encodes the codepoint as an UTF-8 string in the byte array pointed
* to by `dst`. This array must be at least 4 bytes long.
*
* In case of success the number of bytes written is returned, and
* otherwise 0 is returned.
*
* This function does not check whether `codepoint` is valid Unicode.
*/
static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
if (uc < 0x00) {
return 0;
} else if (uc < 0x80) {
dst[0] = uc;
return 1;
} else if (uc < 0x800) {
dst[0] = 0xC0 + (uc >> 6);
dst[1] = 0x80 + (uc & 0x3F);
return 2;
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
// the API, however, these are actually invalid in UTF-8
} else if (uc < 0x10000) {
dst[0] = 0xE0 + (uc >> 12);
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
dst[2] = 0x80 + (uc & 0x3F);
return 3;
} else if (uc < 0x110000) {
dst[0] = 0xF0 + (uc >> 18);
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
dst[3] = 0x80 + (uc & 0x3F);
return 4;
} else return 0;
}
#ifdef __cplusplus
#include <iterator>
#include <string>
class UTF8Iterator
{
std::string::const_iterator m_it;
public:
using iterator_category = std::forward_iterator_tag;
using value_type = uint32_t;
using difference_type = std::ptrdiff_t;
using pointer = uint32_t*;
using reference = uint32_t&;
UTF8Iterator(const std::string::const_iterator& it) : m_it(it) {}
UTF8Iterator& operator+=(size_t v)
{
for (size_t i=0 ; i<v ; ++i)
{
utf8proc_int32_t dummy;
utf8proc_ssize_t sz = utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &dummy);
#ifndef NDEBUG
if (*m_it == '\0')
{
fprintf(stderr, "ERROR! UTF8-iterator null-term fail\n");
abort();
}
else if (sz > 0)
m_it += sz;
else
{
fprintf(stderr, "ERROR! UTF8Iterator character fail");
abort();
}
#else
if (sz > 0)
m_it += sz;
#endif
}
return *this;
}
UTF8Iterator& operator++()
{
return this->operator+=(1);
}
UTF8Iterator operator+(size_t v) const
{
UTF8Iterator ret(m_it);
ret += v;
return ret;
}
uint32_t operator*() const
{
utf8proc_int32_t ret;
utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &ret);
return ret;
}
std::string::const_iterator iter() const {return m_it;}
size_t countTo(std::string::const_iterator end) const
{
UTF8Iterator it(m_it);
size_t ret = 0;
while (it.iter() < end)
{
++ret;
++it;
}
return ret;
}
};
#endif
#endif

View File

@ -5,7 +5,7 @@
namespace kabufuda {
AsyncIO::AsyncIO(SystemStringView filename, bool truncate) { m_fd = fopen(filename.data(), truncate ? "rw" : "ra"); }
AsyncIO::AsyncIO(std::string_view filename, bool truncate) { m_fd = fopen(filename.data(), truncate ? "rw" : "ra"); }
AsyncIO::~AsyncIO() {
if (*this) {

View File

@ -6,7 +6,7 @@
namespace kabufuda {
AsyncIO::AsyncIO(SystemStringView filename, bool truncate) {
AsyncIO::AsyncIO(std::string_view filename, bool truncate) {
m_fd = open(filename.data(), O_RDWR | O_CREAT | (truncate ? O_TRUNC : 0), 0644);
}

View File

@ -15,7 +15,7 @@ static void ResetOverlapped(OVERLAPPED& aio, DWORD offset = 0) {
aio.OffsetHigh = 0;
}
AsyncIO::AsyncIO(SystemStringView filename, bool truncate) {
AsyncIO::AsyncIO(std::string_view filename, bool truncate) {
#if WINDOWS_STORE
CREATEFILE2_EXTENDED_PARAMETERS parms = {};
parms.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS);
@ -24,7 +24,8 @@ AsyncIO::AsyncIO(SystemStringView filename, bool truncate) {
m_fh = CreateFile2(filename.data(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
truncate ? CREATE_ALWAYS : OPEN_ALWAYS, &parms);
#else
m_fh = CreateFileW(filename.data(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr,
WStringConv wfilename(filename);
m_fh = CreateFileW(wfilename.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr,
truncate ? CREATE_ALWAYS : OPEN_ALWAYS, FILE_FLAG_OVERLAPPED | FILE_ATTRIBUTE_NORMAL, nullptr);
#endif
}

View File

@ -832,7 +832,7 @@ void Card::format(ECardSlot id, ECardSize size, EEncoding encoding) {
}
}
ProbeResults Card::probeCardFile(SystemStringView filename) {
ProbeResults Card::probeCardFile(std::string_view filename) {
Sstat stat;
if (Stat(filename.data(), &stat) || !S_ISREG(stat.st_mode))
return {ECardResult::NOCARD, 0, 0};
@ -866,7 +866,7 @@ void Card::commit() {
}
}
bool Card::open(SystemStringView filepath) {
bool Card::open(std::string_view filepath) {
m_opened = false;
m_filename = filepath;
m_fileHandle = AsyncIO(m_filename);

View File

@ -1,39 +0,0 @@
#include "kabufuda/WideStringConvert.hpp"
#include <cstdio>
#include <utf8proc.h>
namespace kabufuda {
std::string WideToUTF8(std::wstring_view src) {
std::string retval;
retval.reserve(src.length());
for (wchar_t ch : src) {
utf8proc_uint8_t mb[4];
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
if (c < 0) {
fprintf(stderr, "invalid UTF-8 character while encoding");
return retval;
}
retval.append(reinterpret_cast<char*>(mb), c);
}
return retval;
}
std::wstring UTF8ToWide(std::string_view src) {
std::wstring retval;
retval.reserve(src.length());
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(src.data());
while (*buf) {
utf8proc_int32_t wc;
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
if (len < 0) {
fprintf(stderr, "invalid UTF-8 character while decoding");
return retval;
}
buf += len;
retval += wchar_t(wc);
}
return retval;
}
} // namespace kabufuda

View File

@ -3,7 +3,7 @@
int main() {
kabufuda::Card mc{"GM8E", "01"};
mc.open(_SYS_STR("test.USA.raw"));
mc.open("test.USA.raw");
mc.format(kabufuda::ECardSlot::SlotA, kabufuda::ECardSize::Card2043Mb);
uint64_t a = 0;
mc.getSerial(a);