Hook `IsDBCSLeadByte`; cleanup & improvements

This commit is contained in:
Luke Street 2023-09-08 16:55:56 -04:00
parent 9fe2bd4088
commit 808280cb10
3 changed files with 160 additions and 102 deletions

59
Cargo.lock generated
View File

@ -8,18 +8,18 @@ version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "encoding_rs"
version = "0.8.33"
@ -27,22 +27,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
dependencies = [
"cfg-if",
"packed_simd",
]
[[package]]
name = "itertools"
version = "0.11.0"
name = "libm"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
[[package]]
name = "memexec"
@ -50,15 +42,40 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c"
[[package]]
name = "num-traits"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "packed_simd"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
dependencies = [
"cfg-if",
"num-traits",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "sjiswrap"
version = "1.0.0"
version = "1.1.0"
dependencies = [
"anyhow",
"encoding_rs",
"itertools",
"lazy_static",
"memexec",
"rustc-hash",
"windows",
]

View File

@ -3,7 +3,7 @@ name = "sjiswrap"
description = "UTF-8 to Shift JIS wrapper for old compilers."
authors = ["Luke Street <luke@street.dev>"]
license = "MIT OR Apache-2.0"
version = "1.0.0"
version = "1.1.0"
edition = "2021"
publish = false
repository = "https://github.com/encounter/sjiswrap"
@ -22,15 +22,15 @@ debug = []
[dependencies]
anyhow = "1.0.72"
encoding_rs = "0.8.32"
itertools = "0.11.0"
lazy_static = "1.4.0"
encoding_rs = { version = "0.8.32", features = ["simd-accel", "fast-kanji-encode"] }
memexec = { version = "0.2.0", features = ["hook"] }
rustc-hash = "1.1.0"
[dependencies.windows]
version = "0.48.0"
features = [
"Win32_Foundation",
"Win32_Globalization",
"Win32_Security",
"Win32_Storage_FileSystem",
"Win32_System_Environment",

View File

@ -2,19 +2,19 @@
use std::{
borrow::Cow,
cmp::min,
collections::HashMap,
ffi::{c_char, c_void, CStr, CString, OsString},
collections::{hash_map::Entry, HashMap},
ffi::{c_char, c_void, CStr, CString, OsStr, OsString},
fs::File,
io::Read,
iter::{Cloned, Peekable},
path::PathBuf,
mem::MaybeUninit,
path::{Path, PathBuf},
process::exit,
sync::Mutex,
};
use anyhow::{Context, Result};
use encoding_rs::SHIFT_JIS;
use lazy_static::lazy_static;
use rustc_hash::FxHashMap;
use windows::{
core::{PCSTR, PCWSTR},
Win32::{
@ -33,17 +33,20 @@ use windows::{
};
/// Whether to hook and encode a file.
fn is_text_file(path: &str) -> bool {
path.ends_with(".c")
|| path.ends_with(".cc")
|| path.ends_with(".cp")
|| path.ends_with(".cpp")
|| path.ends_with(".cxx")
|| path.ends_with(".h")
|| path.ends_with(".hh")
|| path.ends_with(".hp")
|| path.ends_with(".hpp")
|| path.ends_with(".hxx")
fn is_text_file(path: &Path) -> bool {
let Some(ext) = path.extension() else {
return false;
};
ext == OsStr::new("c")
|| ext == OsStr::new("cc")
|| ext == OsStr::new("cp")
|| ext == OsStr::new("cpp")
|| ext == OsStr::new("cxx")
|| ext == OsStr::new("h")
|| ext == OsStr::new("hh")
|| ext == OsStr::new("hp")
|| ext == OsStr::new("hpp")
|| ext == OsStr::new("hxx")
}
macro_rules! debug_println {
@ -65,6 +68,8 @@ fn main() -> Result<()> {
exit(1);
}
unsafe { GLOBAL_STATE = MaybeUninit::new(GlobalState::default()) };
let path = PathBuf::from(&args[1]);
let parent = CString::new(
path.parent()
@ -95,13 +100,14 @@ fn main() -> Result<()> {
hooks.insert("kernel32.dll!CloseHandle".into(), hook_CloseHandle as *const c_void);
hooks.insert("kernel32.dll!ReadFile".into(), hook_ReadFile as *const c_void);
hooks.insert("kernel32.dll!SetFilePointer".into(), hook_SetFilePointer as *const c_void);
hooks.insert("kernel32.dll!IsDBCSLeadByte".into(), hook_IsDBCSLeadByte as *const c_void);
unsafe { memexec::memexec_exe_with_hooks(&buf, &hooks) }.expect("Failed to execute");
Ok(())
}
/// File that has been read into memory and encoded.
struct FileHandle {
data: Vec<u8>,
path: PathBuf,
pos: u64,
}
@ -109,17 +115,24 @@ struct FileHandle {
#[derive(Default)]
struct GlobalState {
cmdline: Option<CString>,
file_handles: HashMap<isize, FileHandle>,
encoded_files: FxHashMap<PathBuf, Vec<u8>>,
file_handles: FxHashMap<isize, FileHandle>,
}
lazy_static! {
static ref GLOBAL_STATE: Mutex<GlobalState> = Default::default();
impl GlobalState {
fn file_by_handle(&mut self, handle: HANDLE) -> Option<(&mut FileHandle, &[u8])> {
self.file_handles
.get_mut(&handle.0)
.and_then(|file| self.encoded_files.get(&file.path).map(|data| (file, data.as_slice())))
}
}
static mut GLOBAL_STATE: MaybeUninit<GlobalState> = MaybeUninit::uninit();
/// `GetCommandLineA` hook. Skips our own executable name and replaces the subprocess path with an absolute path.
extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
if let Some(str) = &guard.cmdline {
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(str) = &state.cmdline {
return PCSTR(str.as_ptr() as *const u8);
}
@ -179,8 +192,8 @@ extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
cmdline.extend(iter);
}
cmdline.push(0);
guard.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) });
PCSTR(guard.cmdline.as_ref().unwrap().as_ptr() as *const u8)
state.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) });
PCSTR(state.cmdline.as_ref().unwrap().as_ptr() as *const u8)
}
/// `GetCommandLineW` hook. Currently unsupported.
@ -188,6 +201,52 @@ extern "stdcall" fn hook_GetCommandLineW() -> PCSTR {
panic!("GetCommandLineW() is not supported");
}
/// Read a file into memory and encode it as Shift JIS.
fn encode_file(handle: HANDLE, path: &Path) {
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
state.file_handles.insert(handle.0, FileHandle { path: path.to_path_buf(), pos: 0 });
let Entry::Vacant(entry) = state.encoded_files.entry(path.to_path_buf()) else {
debug_println!("File already cached: {}", path.display());
return;
};
let mut filesize_high = 0u32;
let mut filesize = unsafe { GetFileSize(handle, Some(&mut filesize_high)) } as u64;
filesize |= (filesize_high as u64) << 32;
if filesize >= u32::MAX as u64 {
return;
}
let mut data = vec![0u8; filesize as usize];
let mut bytes_read = 0u32;
if !unsafe {
ReadFile(
handle,
Some(data.as_mut_ptr() as *mut c_void),
data.len() as u32,
Some(&mut bytes_read),
None,
)
}
.as_bool()
|| bytes_read != filesize as u32
{
return;
}
let str = unsafe { std::str::from_utf8_unchecked(&data) };
let (encoded, _, _) = SHIFT_JIS.encode(str);
match encoded {
Cow::Borrowed(_) => {
// No modifications were made, use the original data
entry.insert(data);
}
Cow::Owned(data) => {
entry.insert(data);
}
}
}
/// `CreateFileA` hook. If it's a text file, read it into memory and encode it as Shift-JIS.
extern "stdcall" fn hook_CreateFileA(
lpFileName: PCSTR,
@ -212,44 +271,20 @@ extern "stdcall" fn hook_CreateFileA(
.unwrap_or(INVALID_HANDLE_VALUE);
let err = unsafe { GetLastError() };
let path = unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }.to_string_lossy();
let path = PathBuf::from(
unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }
.to_str()
.expect("CreateFileA(): Path is not valid UTF-8"),
);
if !ret.is_invalid() && dwDesiredAccess == GENERIC_READ && is_text_file(&path) {
let mut filesize_high = 0u32;
let mut filesize = unsafe { GetFileSize(ret, Some(&mut filesize_high)) } as u64;
filesize |= (filesize_high as u64) << 32;
if filesize < u32::MAX as u64 {
let mut data = vec![0u8; filesize as usize];
let mut bytes_read = 0u32;
if unsafe {
ReadFile(
ret,
Some(data.as_mut_ptr() as *mut c_void),
filesize as u32,
Some(&mut bytes_read),
None,
)
}
.as_bool()
&& bytes_read == filesize as u32
{
if let Ok(str) = std::str::from_utf8(&data) {
let (encoded, _, _) = SHIFT_JIS.encode(str);
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
match encoded {
Cow::Borrowed(_) => {
// No modifications were made, use the original data
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
}
Cow::Owned(data) => {
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
}
}
}
}
}
encode_file(ret, &path);
}
debug_println!("CreateFileA({}, {:#X}) = {:#X}", path, dwDesiredAccess.0, ret.0 as u32);
debug_println!(
"CreateFileA({}, {:#X}) = {:#X}",
path.display(),
dwDesiredAccess.0,
ret.0 as u32
);
unsafe { SetLastError(err) };
ret
}
@ -270,10 +305,10 @@ extern "stdcall" fn hook_CreateFileW(
/// `GetFileSize` hook. If the file was read into memory, return that size instead.
extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> u32 {
if !hFile.is_invalid() {
let guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
if let Some(file) = guard.file_handles.get(&hFile.0) {
debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, file.data.len() as u32);
return file.data.len() as u32;
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some((_handle, data)) = state.file_by_handle(hFile) {
debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, data.len() as u32);
return data.len() as u32;
}
}
@ -285,9 +320,12 @@ extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) ->
/// `CloseHandle` hook. If the file was read into memory, free it.
extern "stdcall" fn hook_CloseHandle(hObject: HANDLE) -> BOOL {
if !hObject.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
if guard.file_handles.remove(&hObject.0).is_some() {
debug_println!("File handle removed: {:#X}", hObject.0);
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(handle) = state.file_handles.remove(&hObject.0) {
let _ = handle;
debug_println!("File handle removed: {:#X} ({})", hObject.0, handle.path.display());
// Purposefully leave the file data itself in the cache.
// mwcceppc in particular will read the same file multiple times.
}
}
@ -305,20 +343,20 @@ extern "stdcall" fn hook_ReadFile(
lpOverlapped: *mut OVERLAPPED,
) -> BOOL {
if !hFile.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
if let Some(file) = guard.file_handles.get_mut(&hFile.0) {
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some((handle, data)) = state.file_by_handle(hFile) {
let count = min(
nNumberOfBytesToRead,
u32::try_from(file.data.len() as u64 - file.pos).unwrap_or(u32::MAX),
u32::try_from(data.len() as u64 - handle.pos).unwrap_or(u32::MAX),
);
unsafe {
std::ptr::copy_nonoverlapping(
file.data.as_ptr().offset(file.pos as isize),
data.as_ptr().offset(handle.pos as isize),
lpBuffer as *mut u8,
count as usize,
);
}
file.pos += count as u64;
handle.pos += count as u64;
if !lpNumberOfBytesRead.is_null() {
unsafe { *lpNumberOfBytesRead = count };
}
@ -362,22 +400,22 @@ extern "stdcall" fn hook_SetFilePointer(
dwMoveMethod: SET_FILE_POINTER_MOVE_METHOD,
) -> u32 {
if !hFile.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
if let Some(file) = guard.file_handles.get_mut(&hFile.0) {
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some((handle, data)) = state.file_by_handle(hFile) {
let distance_to_move_high =
if lpDistanceToMoveHigh.is_null() { 0 } else { unsafe { *lpDistanceToMoveHigh } };
let distance_to_move = lDistanceToMove as i64 | (distance_to_move_high as i64) << 32;
let file_size = file.data.len() as u64;
let file_size = data.len() as u64;
let pos = min(
match dwMoveMethod {
FILE_BEGIN => distance_to_move as u64,
FILE_CURRENT => file.pos.saturating_add_signed(distance_to_move),
FILE_CURRENT => handle.pos.saturating_add_signed(distance_to_move),
FILE_END => file_size.saturating_add_signed(distance_to_move),
_ => panic!("SetFilePointer(): Unsupported move method {:#X}", dwMoveMethod.0),
},
file_size,
);
file.pos = pos;
handle.pos = pos;
debug_println!(
"OVERRIDE SetFilePointer({:#X}, {:#X}, {:?}, {}) = {:#X}",
hFile.0,
@ -406,6 +444,9 @@ extern "stdcall" fn hook_SetFilePointer(
ret
}
/// `IsDBCSLeadByte` hook. This normally uses the system codepage, override with Shift JIS behavior.
extern "stdcall" fn hook_IsDBCSLeadByte(TestChar: u8) -> BOOL { (TestChar & 0x80 != 0).into() }
/// Get the absolute path of a file.
fn get_full_path(path: &CStr) -> Result<CString> {
let mut buf = [0u8; 4096];