Hook `IsDBCSLeadByte`; cleanup & improvements

This commit is contained in:
Luke Street 2023-09-08 16:55:56 -04:00
parent 9fe2bd4088
commit 808280cb10
3 changed files with 160 additions and 102 deletions

59
Cargo.lock generated
View File

@ -8,18 +8,18 @@ version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.33" version = "0.8.33"
@ -27,22 +27,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"packed_simd",
] ]
[[package]] [[package]]
name = "itertools" name = "libm"
version = "0.11.0" version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
dependencies = [
"either",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "memexec" name = "memexec"
@ -50,15 +42,40 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c" checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c"
[[package]]
name = "num-traits"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "packed_simd"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
dependencies = [
"cfg-if",
"num-traits",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "sjiswrap" name = "sjiswrap"
version = "1.0.0" version = "1.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"encoding_rs", "encoding_rs",
"itertools",
"lazy_static",
"memexec", "memexec",
"rustc-hash",
"windows", "windows",
] ]

View File

@ -3,7 +3,7 @@ name = "sjiswrap"
description = "UTF-8 to Shift JIS wrapper for old compilers." description = "UTF-8 to Shift JIS wrapper for old compilers."
authors = ["Luke Street <luke@street.dev>"] authors = ["Luke Street <luke@street.dev>"]
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
version = "1.0.0" version = "1.1.0"
edition = "2021" edition = "2021"
publish = false publish = false
repository = "https://github.com/encounter/sjiswrap" repository = "https://github.com/encounter/sjiswrap"
@ -22,15 +22,15 @@ debug = []
[dependencies] [dependencies]
anyhow = "1.0.72" anyhow = "1.0.72"
encoding_rs = "0.8.32" encoding_rs = { version = "0.8.32", features = ["simd-accel", "fast-kanji-encode"] }
itertools = "0.11.0"
lazy_static = "1.4.0"
memexec = { version = "0.2.0", features = ["hook"] } memexec = { version = "0.2.0", features = ["hook"] }
rustc-hash = "1.1.0"
[dependencies.windows] [dependencies.windows]
version = "0.48.0" version = "0.48.0"
features = [ features = [
"Win32_Foundation", "Win32_Foundation",
"Win32_Globalization",
"Win32_Security", "Win32_Security",
"Win32_Storage_FileSystem", "Win32_Storage_FileSystem",
"Win32_System_Environment", "Win32_System_Environment",

View File

@ -2,19 +2,19 @@
use std::{ use std::{
borrow::Cow, borrow::Cow,
cmp::min, cmp::min,
collections::HashMap, collections::{hash_map::Entry, HashMap},
ffi::{c_char, c_void, CStr, CString, OsString}, ffi::{c_char, c_void, CStr, CString, OsStr, OsString},
fs::File, fs::File,
io::Read, io::Read,
iter::{Cloned, Peekable}, iter::{Cloned, Peekable},
path::PathBuf, mem::MaybeUninit,
path::{Path, PathBuf},
process::exit, process::exit,
sync::Mutex,
}; };
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use encoding_rs::SHIFT_JIS; use encoding_rs::SHIFT_JIS;
use lazy_static::lazy_static; use rustc_hash::FxHashMap;
use windows::{ use windows::{
core::{PCSTR, PCWSTR}, core::{PCSTR, PCWSTR},
Win32::{ Win32::{
@ -33,17 +33,20 @@ use windows::{
}; };
/// Whether to hook and encode a file. /// Whether to hook and encode a file.
fn is_text_file(path: &str) -> bool { fn is_text_file(path: &Path) -> bool {
path.ends_with(".c") let Some(ext) = path.extension() else {
|| path.ends_with(".cc") return false;
|| path.ends_with(".cp") };
|| path.ends_with(".cpp") ext == OsStr::new("c")
|| path.ends_with(".cxx") || ext == OsStr::new("cc")
|| path.ends_with(".h") || ext == OsStr::new("cp")
|| path.ends_with(".hh") || ext == OsStr::new("cpp")
|| path.ends_with(".hp") || ext == OsStr::new("cxx")
|| path.ends_with(".hpp") || ext == OsStr::new("h")
|| path.ends_with(".hxx") || ext == OsStr::new("hh")
|| ext == OsStr::new("hp")
|| ext == OsStr::new("hpp")
|| ext == OsStr::new("hxx")
} }
macro_rules! debug_println { macro_rules! debug_println {
@ -65,6 +68,8 @@ fn main() -> Result<()> {
exit(1); exit(1);
} }
unsafe { GLOBAL_STATE = MaybeUninit::new(GlobalState::default()) };
let path = PathBuf::from(&args[1]); let path = PathBuf::from(&args[1]);
let parent = CString::new( let parent = CString::new(
path.parent() path.parent()
@ -95,13 +100,14 @@ fn main() -> Result<()> {
hooks.insert("kernel32.dll!CloseHandle".into(), hook_CloseHandle as *const c_void); hooks.insert("kernel32.dll!CloseHandle".into(), hook_CloseHandle as *const c_void);
hooks.insert("kernel32.dll!ReadFile".into(), hook_ReadFile as *const c_void); hooks.insert("kernel32.dll!ReadFile".into(), hook_ReadFile as *const c_void);
hooks.insert("kernel32.dll!SetFilePointer".into(), hook_SetFilePointer as *const c_void); hooks.insert("kernel32.dll!SetFilePointer".into(), hook_SetFilePointer as *const c_void);
hooks.insert("kernel32.dll!IsDBCSLeadByte".into(), hook_IsDBCSLeadByte as *const c_void);
unsafe { memexec::memexec_exe_with_hooks(&buf, &hooks) }.expect("Failed to execute"); unsafe { memexec::memexec_exe_with_hooks(&buf, &hooks) }.expect("Failed to execute");
Ok(()) Ok(())
} }
/// File that has been read into memory and encoded. /// File that has been read into memory and encoded.
struct FileHandle { struct FileHandle {
data: Vec<u8>, path: PathBuf,
pos: u64, pos: u64,
} }
@ -109,17 +115,24 @@ struct FileHandle {
#[derive(Default)] #[derive(Default)]
struct GlobalState { struct GlobalState {
cmdline: Option<CString>, cmdline: Option<CString>,
file_handles: HashMap<isize, FileHandle>, encoded_files: FxHashMap<PathBuf, Vec<u8>>,
file_handles: FxHashMap<isize, FileHandle>,
} }
lazy_static! { impl GlobalState {
static ref GLOBAL_STATE: Mutex<GlobalState> = Default::default(); fn file_by_handle(&mut self, handle: HANDLE) -> Option<(&mut FileHandle, &[u8])> {
self.file_handles
.get_mut(&handle.0)
.and_then(|file| self.encoded_files.get(&file.path).map(|data| (file, data.as_slice())))
}
} }
static mut GLOBAL_STATE: MaybeUninit<GlobalState> = MaybeUninit::uninit();
/// `GetCommandLineA` hook. Skips our own executable name and replaces the subprocess path with an absolute path. /// `GetCommandLineA` hook. Skips our own executable name and replaces the subprocess path with an absolute path.
extern "stdcall" fn hook_GetCommandLineA() -> PCSTR { extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(str) = &guard.cmdline { if let Some(str) = &state.cmdline {
return PCSTR(str.as_ptr() as *const u8); return PCSTR(str.as_ptr() as *const u8);
} }
@ -179,8 +192,8 @@ extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
cmdline.extend(iter); cmdline.extend(iter);
} }
cmdline.push(0); cmdline.push(0);
guard.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) }); state.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) });
PCSTR(guard.cmdline.as_ref().unwrap().as_ptr() as *const u8) PCSTR(state.cmdline.as_ref().unwrap().as_ptr() as *const u8)
} }
/// `GetCommandLineW` hook. Currently unsupported. /// `GetCommandLineW` hook. Currently unsupported.
@ -188,6 +201,52 @@ extern "stdcall" fn hook_GetCommandLineW() -> PCSTR {
panic!("GetCommandLineW() is not supported"); panic!("GetCommandLineW() is not supported");
} }
/// Read a file into memory and encode it as Shift JIS.
fn encode_file(handle: HANDLE, path: &Path) {
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
state.file_handles.insert(handle.0, FileHandle { path: path.to_path_buf(), pos: 0 });
let Entry::Vacant(entry) = state.encoded_files.entry(path.to_path_buf()) else {
debug_println!("File already cached: {}", path.display());
return;
};
let mut filesize_high = 0u32;
let mut filesize = unsafe { GetFileSize(handle, Some(&mut filesize_high)) } as u64;
filesize |= (filesize_high as u64) << 32;
if filesize >= u32::MAX as u64 {
return;
}
let mut data = vec![0u8; filesize as usize];
let mut bytes_read = 0u32;
if !unsafe {
ReadFile(
handle,
Some(data.as_mut_ptr() as *mut c_void),
data.len() as u32,
Some(&mut bytes_read),
None,
)
}
.as_bool()
|| bytes_read != filesize as u32
{
return;
}
let str = unsafe { std::str::from_utf8_unchecked(&data) };
let (encoded, _, _) = SHIFT_JIS.encode(str);
match encoded {
Cow::Borrowed(_) => {
// No modifications were made, use the original data
entry.insert(data);
}
Cow::Owned(data) => {
entry.insert(data);
}
}
}
/// `CreateFileA` hook. If it's a text file, read it into memory and encode it as Shift-JIS. /// `CreateFileA` hook. If it's a text file, read it into memory and encode it as Shift-JIS.
extern "stdcall" fn hook_CreateFileA( extern "stdcall" fn hook_CreateFileA(
lpFileName: PCSTR, lpFileName: PCSTR,
@ -212,44 +271,20 @@ extern "stdcall" fn hook_CreateFileA(
.unwrap_or(INVALID_HANDLE_VALUE); .unwrap_or(INVALID_HANDLE_VALUE);
let err = unsafe { GetLastError() }; let err = unsafe { GetLastError() };
let path = unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }.to_string_lossy(); let path = PathBuf::from(
unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }
.to_str()
.expect("CreateFileA(): Path is not valid UTF-8"),
);
if !ret.is_invalid() && dwDesiredAccess == GENERIC_READ && is_text_file(&path) { if !ret.is_invalid() && dwDesiredAccess == GENERIC_READ && is_text_file(&path) {
let mut filesize_high = 0u32; encode_file(ret, &path);
let mut filesize = unsafe { GetFileSize(ret, Some(&mut filesize_high)) } as u64;
filesize |= (filesize_high as u64) << 32;
if filesize < u32::MAX as u64 {
let mut data = vec![0u8; filesize as usize];
let mut bytes_read = 0u32;
if unsafe {
ReadFile(
ret,
Some(data.as_mut_ptr() as *mut c_void),
filesize as u32,
Some(&mut bytes_read),
None,
)
} }
.as_bool() debug_println!(
&& bytes_read == filesize as u32 "CreateFileA({}, {:#X}) = {:#X}",
{ path.display(),
if let Ok(str) = std::str::from_utf8(&data) { dwDesiredAccess.0,
let (encoded, _, _) = SHIFT_JIS.encode(str); ret.0 as u32
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); );
match encoded {
Cow::Borrowed(_) => {
// No modifications were made, use the original data
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
}
Cow::Owned(data) => {
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
}
}
}
}
}
}
debug_println!("CreateFileA({}, {:#X}) = {:#X}", path, dwDesiredAccess.0, ret.0 as u32);
unsafe { SetLastError(err) }; unsafe { SetLastError(err) };
ret ret
} }
@ -270,10 +305,10 @@ extern "stdcall" fn hook_CreateFileW(
/// `GetFileSize` hook. If the file was read into memory, return that size instead. /// `GetFileSize` hook. If the file was read into memory, return that size instead.
extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> u32 { extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> u32 {
if !hFile.is_invalid() { if !hFile.is_invalid() {
let guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(file) = guard.file_handles.get(&hFile.0) { if let Some((_handle, data)) = state.file_by_handle(hFile) {
debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, file.data.len() as u32); debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, data.len() as u32);
return file.data.len() as u32; return data.len() as u32;
} }
} }
@ -285,9 +320,12 @@ extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) ->
/// `CloseHandle` hook. If the file was read into memory, free it. /// `CloseHandle` hook. If the file was read into memory, free it.
extern "stdcall" fn hook_CloseHandle(hObject: HANDLE) -> BOOL { extern "stdcall" fn hook_CloseHandle(hObject: HANDLE) -> BOOL {
if !hObject.is_invalid() { if !hObject.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if guard.file_handles.remove(&hObject.0).is_some() { if let Some(handle) = state.file_handles.remove(&hObject.0) {
debug_println!("File handle removed: {:#X}", hObject.0); let _ = handle;
debug_println!("File handle removed: {:#X} ({})", hObject.0, handle.path.display());
// Purposefully leave the file data itself in the cache.
// mwcceppc in particular will read the same file multiple times.
} }
} }
@ -305,20 +343,20 @@ extern "stdcall" fn hook_ReadFile(
lpOverlapped: *mut OVERLAPPED, lpOverlapped: *mut OVERLAPPED,
) -> BOOL { ) -> BOOL {
if !hFile.is_invalid() { if !hFile.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(file) = guard.file_handles.get_mut(&hFile.0) { if let Some((handle, data)) = state.file_by_handle(hFile) {
let count = min( let count = min(
nNumberOfBytesToRead, nNumberOfBytesToRead,
u32::try_from(file.data.len() as u64 - file.pos).unwrap_or(u32::MAX), u32::try_from(data.len() as u64 - handle.pos).unwrap_or(u32::MAX),
); );
unsafe { unsafe {
std::ptr::copy_nonoverlapping( std::ptr::copy_nonoverlapping(
file.data.as_ptr().offset(file.pos as isize), data.as_ptr().offset(handle.pos as isize),
lpBuffer as *mut u8, lpBuffer as *mut u8,
count as usize, count as usize,
); );
} }
file.pos += count as u64; handle.pos += count as u64;
if !lpNumberOfBytesRead.is_null() { if !lpNumberOfBytesRead.is_null() {
unsafe { *lpNumberOfBytesRead = count }; unsafe { *lpNumberOfBytesRead = count };
} }
@ -362,22 +400,22 @@ extern "stdcall" fn hook_SetFilePointer(
dwMoveMethod: SET_FILE_POINTER_MOVE_METHOD, dwMoveMethod: SET_FILE_POINTER_MOVE_METHOD,
) -> u32 { ) -> u32 {
if !hFile.is_invalid() { if !hFile.is_invalid() {
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); let state = unsafe { GLOBAL_STATE.assume_init_mut() };
if let Some(file) = guard.file_handles.get_mut(&hFile.0) { if let Some((handle, data)) = state.file_by_handle(hFile) {
let distance_to_move_high = let distance_to_move_high =
if lpDistanceToMoveHigh.is_null() { 0 } else { unsafe { *lpDistanceToMoveHigh } }; if lpDistanceToMoveHigh.is_null() { 0 } else { unsafe { *lpDistanceToMoveHigh } };
let distance_to_move = lDistanceToMove as i64 | (distance_to_move_high as i64) << 32; let distance_to_move = lDistanceToMove as i64 | (distance_to_move_high as i64) << 32;
let file_size = file.data.len() as u64; let file_size = data.len() as u64;
let pos = min( let pos = min(
match dwMoveMethod { match dwMoveMethod {
FILE_BEGIN => distance_to_move as u64, FILE_BEGIN => distance_to_move as u64,
FILE_CURRENT => file.pos.saturating_add_signed(distance_to_move), FILE_CURRENT => handle.pos.saturating_add_signed(distance_to_move),
FILE_END => file_size.saturating_add_signed(distance_to_move), FILE_END => file_size.saturating_add_signed(distance_to_move),
_ => panic!("SetFilePointer(): Unsupported move method {:#X}", dwMoveMethod.0), _ => panic!("SetFilePointer(): Unsupported move method {:#X}", dwMoveMethod.0),
}, },
file_size, file_size,
); );
file.pos = pos; handle.pos = pos;
debug_println!( debug_println!(
"OVERRIDE SetFilePointer({:#X}, {:#X}, {:?}, {}) = {:#X}", "OVERRIDE SetFilePointer({:#X}, {:#X}, {:?}, {}) = {:#X}",
hFile.0, hFile.0,
@ -406,6 +444,9 @@ extern "stdcall" fn hook_SetFilePointer(
ret ret
} }
/// `IsDBCSLeadByte` hook. This normally uses the system codepage, override with Shift JIS behavior.
extern "stdcall" fn hook_IsDBCSLeadByte(TestChar: u8) -> BOOL { (TestChar & 0x80 != 0).into() }
/// Get the absolute path of a file. /// Get the absolute path of a file.
fn get_full_path(path: &CStr) -> Result<CString> { fn get_full_path(path: &CStr) -> Result<CString> {
let mut buf = [0u8; 4096]; let mut buf = [0u8; 4096];