From 808280cb1063dd454c05a0768cf694fefdfc0dbd Mon Sep 17 00:00:00 2001 From: Luke Street Date: Fri, 8 Sep 2023 16:55:56 -0400 Subject: [PATCH] Hook `IsDBCSLeadByte`; cleanup & improvements --- Cargo.lock | 59 ++++++++++------ Cargo.toml | 8 +-- src/main.rs | 195 +++++++++++++++++++++++++++++++--------------------- 3 files changed, 160 insertions(+), 102 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ce9214..275394a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,18 +8,18 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - [[package]] name = "encoding_rs" version = "0.8.33" @@ -27,22 +27,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ "cfg-if", + "packed_simd", ] [[package]] -name = "itertools" -version = "0.11.0" +name = "libm" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "memexec" @@ -50,15 +42,40 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c" +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "packed_simd" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d" +dependencies = [ + "cfg-if", + "num-traits", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "sjiswrap" -version = "1.0.0" +version = "1.1.0" dependencies = [ "anyhow", "encoding_rs", - "itertools", - "lazy_static", "memexec", + "rustc-hash", "windows", ] diff --git a/Cargo.toml b/Cargo.toml index 11a7d84..80e89b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "sjiswrap" description = "UTF-8 to Shift JIS wrapper for old compilers." authors = ["Luke Street "] license = "MIT OR Apache-2.0" -version = "1.0.0" +version = "1.1.0" edition = "2021" publish = false repository = "https://github.com/encounter/sjiswrap" @@ -22,15 +22,15 @@ debug = [] [dependencies] anyhow = "1.0.72" -encoding_rs = "0.8.32" -itertools = "0.11.0" -lazy_static = "1.4.0" +encoding_rs = { version = "0.8.32", features = ["simd-accel", "fast-kanji-encode"] } memexec = { version = "0.2.0", features = ["hook"] } +rustc-hash = "1.1.0" [dependencies.windows] version = "0.48.0" features = [ "Win32_Foundation", + "Win32_Globalization", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Environment", diff --git a/src/main.rs b/src/main.rs index 3593a72..78bcfda 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,19 +2,19 @@ use std::{ borrow::Cow, cmp::min, - collections::HashMap, - ffi::{c_char, c_void, CStr, CString, OsString}, + collections::{hash_map::Entry, HashMap}, + ffi::{c_char, c_void, CStr, CString, OsStr, OsString}, fs::File, io::Read, iter::{Cloned, Peekable}, - path::PathBuf, + mem::MaybeUninit, + path::{Path, PathBuf}, process::exit, - sync::Mutex, }; use anyhow::{Context, Result}; use encoding_rs::SHIFT_JIS; -use lazy_static::lazy_static; +use rustc_hash::FxHashMap; use windows::{ core::{PCSTR, PCWSTR}, Win32::{ @@ -33,17 +33,20 @@ use windows::{ }; /// Whether to hook and encode a file. -fn is_text_file(path: &str) -> bool { - path.ends_with(".c") - || path.ends_with(".cc") - || path.ends_with(".cp") - || path.ends_with(".cpp") - || path.ends_with(".cxx") - || path.ends_with(".h") - || path.ends_with(".hh") - || path.ends_with(".hp") - || path.ends_with(".hpp") - || path.ends_with(".hxx") +fn is_text_file(path: &Path) -> bool { + let Some(ext) = path.extension() else { + return false; + }; + ext == OsStr::new("c") + || ext == OsStr::new("cc") + || ext == OsStr::new("cp") + || ext == OsStr::new("cpp") + || ext == OsStr::new("cxx") + || ext == OsStr::new("h") + || ext == OsStr::new("hh") + || ext == OsStr::new("hp") + || ext == OsStr::new("hpp") + || ext == OsStr::new("hxx") } macro_rules! debug_println { @@ -65,6 +68,8 @@ fn main() -> Result<()> { exit(1); } + unsafe { GLOBAL_STATE = MaybeUninit::new(GlobalState::default()) }; + let path = PathBuf::from(&args[1]); let parent = CString::new( path.parent() @@ -95,13 +100,14 @@ fn main() -> Result<()> { hooks.insert("kernel32.dll!CloseHandle".into(), hook_CloseHandle as *const c_void); hooks.insert("kernel32.dll!ReadFile".into(), hook_ReadFile as *const c_void); hooks.insert("kernel32.dll!SetFilePointer".into(), hook_SetFilePointer as *const c_void); + hooks.insert("kernel32.dll!IsDBCSLeadByte".into(), hook_IsDBCSLeadByte as *const c_void); unsafe { memexec::memexec_exe_with_hooks(&buf, &hooks) }.expect("Failed to execute"); Ok(()) } /// File that has been read into memory and encoded. struct FileHandle { - data: Vec, + path: PathBuf, pos: u64, } @@ -109,17 +115,24 @@ struct FileHandle { #[derive(Default)] struct GlobalState { cmdline: Option, - file_handles: HashMap, + encoded_files: FxHashMap>, + file_handles: FxHashMap, } -lazy_static! { - static ref GLOBAL_STATE: Mutex = Default::default(); +impl GlobalState { + fn file_by_handle(&mut self, handle: HANDLE) -> Option<(&mut FileHandle, &[u8])> { + self.file_handles + .get_mut(&handle.0) + .and_then(|file| self.encoded_files.get(&file.path).map(|data| (file, data.as_slice()))) + } } +static mut GLOBAL_STATE: MaybeUninit = MaybeUninit::uninit(); + /// `GetCommandLineA` hook. Skips our own executable name and replaces the subprocess path with an absolute path. extern "stdcall" fn hook_GetCommandLineA() -> PCSTR { - let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - if let Some(str) = &guard.cmdline { + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + if let Some(str) = &state.cmdline { return PCSTR(str.as_ptr() as *const u8); } @@ -179,8 +192,8 @@ extern "stdcall" fn hook_GetCommandLineA() -> PCSTR { cmdline.extend(iter); } cmdline.push(0); - guard.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) }); - PCSTR(guard.cmdline.as_ref().unwrap().as_ptr() as *const u8) + state.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) }); + PCSTR(state.cmdline.as_ref().unwrap().as_ptr() as *const u8) } /// `GetCommandLineW` hook. Currently unsupported. @@ -188,6 +201,52 @@ extern "stdcall" fn hook_GetCommandLineW() -> PCSTR { panic!("GetCommandLineW() is not supported"); } +/// Read a file into memory and encode it as Shift JIS. +fn encode_file(handle: HANDLE, path: &Path) { + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + state.file_handles.insert(handle.0, FileHandle { path: path.to_path_buf(), pos: 0 }); + let Entry::Vacant(entry) = state.encoded_files.entry(path.to_path_buf()) else { + debug_println!("File already cached: {}", path.display()); + return; + }; + + let mut filesize_high = 0u32; + let mut filesize = unsafe { GetFileSize(handle, Some(&mut filesize_high)) } as u64; + filesize |= (filesize_high as u64) << 32; + if filesize >= u32::MAX as u64 { + return; + } + + let mut data = vec![0u8; filesize as usize]; + let mut bytes_read = 0u32; + if !unsafe { + ReadFile( + handle, + Some(data.as_mut_ptr() as *mut c_void), + data.len() as u32, + Some(&mut bytes_read), + None, + ) + } + .as_bool() + || bytes_read != filesize as u32 + { + return; + } + + let str = unsafe { std::str::from_utf8_unchecked(&data) }; + let (encoded, _, _) = SHIFT_JIS.encode(str); + match encoded { + Cow::Borrowed(_) => { + // No modifications were made, use the original data + entry.insert(data); + } + Cow::Owned(data) => { + entry.insert(data); + } + } +} + /// `CreateFileA` hook. If it's a text file, read it into memory and encode it as Shift-JIS. extern "stdcall" fn hook_CreateFileA( lpFileName: PCSTR, @@ -212,44 +271,20 @@ extern "stdcall" fn hook_CreateFileA( .unwrap_or(INVALID_HANDLE_VALUE); let err = unsafe { GetLastError() }; - let path = unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }.to_string_lossy(); + let path = PathBuf::from( + unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) } + .to_str() + .expect("CreateFileA(): Path is not valid UTF-8"), + ); if !ret.is_invalid() && dwDesiredAccess == GENERIC_READ && is_text_file(&path) { - let mut filesize_high = 0u32; - let mut filesize = unsafe { GetFileSize(ret, Some(&mut filesize_high)) } as u64; - filesize |= (filesize_high as u64) << 32; - - if filesize < u32::MAX as u64 { - let mut data = vec![0u8; filesize as usize]; - let mut bytes_read = 0u32; - if unsafe { - ReadFile( - ret, - Some(data.as_mut_ptr() as *mut c_void), - filesize as u32, - Some(&mut bytes_read), - None, - ) - } - .as_bool() - && bytes_read == filesize as u32 - { - if let Ok(str) = std::str::from_utf8(&data) { - let (encoded, _, _) = SHIFT_JIS.encode(str); - let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - match encoded { - Cow::Borrowed(_) => { - // No modifications were made, use the original data - guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 }); - } - Cow::Owned(data) => { - guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 }); - } - } - } - } - } + encode_file(ret, &path); } - debug_println!("CreateFileA({}, {:#X}) = {:#X}", path, dwDesiredAccess.0, ret.0 as u32); + debug_println!( + "CreateFileA({}, {:#X}) = {:#X}", + path.display(), + dwDesiredAccess.0, + ret.0 as u32 + ); unsafe { SetLastError(err) }; ret } @@ -270,10 +305,10 @@ extern "stdcall" fn hook_CreateFileW( /// `GetFileSize` hook. If the file was read into memory, return that size instead. extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> u32 { if !hFile.is_invalid() { - let guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - if let Some(file) = guard.file_handles.get(&hFile.0) { - debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, file.data.len() as u32); - return file.data.len() as u32; + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + if let Some((_handle, data)) = state.file_by_handle(hFile) { + debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, data.len() as u32); + return data.len() as u32; } } @@ -285,9 +320,12 @@ extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> /// `CloseHandle` hook. If the file was read into memory, free it. extern "stdcall" fn hook_CloseHandle(hObject: HANDLE) -> BOOL { if !hObject.is_invalid() { - let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - if guard.file_handles.remove(&hObject.0).is_some() { - debug_println!("File handle removed: {:#X}", hObject.0); + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + if let Some(handle) = state.file_handles.remove(&hObject.0) { + let _ = handle; + debug_println!("File handle removed: {:#X} ({})", hObject.0, handle.path.display()); + // Purposefully leave the file data itself in the cache. + // mwcceppc in particular will read the same file multiple times. } } @@ -305,20 +343,20 @@ extern "stdcall" fn hook_ReadFile( lpOverlapped: *mut OVERLAPPED, ) -> BOOL { if !hFile.is_invalid() { - let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - if let Some(file) = guard.file_handles.get_mut(&hFile.0) { + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + if let Some((handle, data)) = state.file_by_handle(hFile) { let count = min( nNumberOfBytesToRead, - u32::try_from(file.data.len() as u64 - file.pos).unwrap_or(u32::MAX), + u32::try_from(data.len() as u64 - handle.pos).unwrap_or(u32::MAX), ); unsafe { std::ptr::copy_nonoverlapping( - file.data.as_ptr().offset(file.pos as isize), + data.as_ptr().offset(handle.pos as isize), lpBuffer as *mut u8, count as usize, ); } - file.pos += count as u64; + handle.pos += count as u64; if !lpNumberOfBytesRead.is_null() { unsafe { *lpNumberOfBytesRead = count }; } @@ -362,22 +400,22 @@ extern "stdcall" fn hook_SetFilePointer( dwMoveMethod: SET_FILE_POINTER_MOVE_METHOD, ) -> u32 { if !hFile.is_invalid() { - let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state"); - if let Some(file) = guard.file_handles.get_mut(&hFile.0) { + let state = unsafe { GLOBAL_STATE.assume_init_mut() }; + if let Some((handle, data)) = state.file_by_handle(hFile) { let distance_to_move_high = if lpDistanceToMoveHigh.is_null() { 0 } else { unsafe { *lpDistanceToMoveHigh } }; let distance_to_move = lDistanceToMove as i64 | (distance_to_move_high as i64) << 32; - let file_size = file.data.len() as u64; + let file_size = data.len() as u64; let pos = min( match dwMoveMethod { FILE_BEGIN => distance_to_move as u64, - FILE_CURRENT => file.pos.saturating_add_signed(distance_to_move), + FILE_CURRENT => handle.pos.saturating_add_signed(distance_to_move), FILE_END => file_size.saturating_add_signed(distance_to_move), _ => panic!("SetFilePointer(): Unsupported move method {:#X}", dwMoveMethod.0), }, file_size, ); - file.pos = pos; + handle.pos = pos; debug_println!( "OVERRIDE SetFilePointer({:#X}, {:#X}, {:?}, {}) = {:#X}", hFile.0, @@ -406,6 +444,9 @@ extern "stdcall" fn hook_SetFilePointer( ret } +/// `IsDBCSLeadByte` hook. This normally uses the system codepage, override with Shift JIS behavior. +extern "stdcall" fn hook_IsDBCSLeadByte(TestChar: u8) -> BOOL { (TestChar & 0x80 != 0).into() } + /// Get the absolute path of a file. fn get_full_path(path: &CStr) -> Result { let mut buf = [0u8; 4096];