Hook `IsDBCSLeadByte`; cleanup & improvements
This commit is contained in:
parent
9fe2bd4088
commit
808280cb10
|
@ -8,18 +8,18 @@ version = "1.0.75"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.33"
|
||||
|
@ -27,22 +27,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"packed_simd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.11.0"
|
||||
name = "libm"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
|
||||
|
||||
[[package]]
|
||||
name = "memexec"
|
||||
|
@ -50,15 +42,40 @@ version = "0.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "sjiswrap"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"encoding_rs",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"memexec",
|
||||
"rustc-hash",
|
||||
"windows",
|
||||
]
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ name = "sjiswrap"
|
|||
description = "UTF-8 to Shift JIS wrapper for old compilers."
|
||||
authors = ["Luke Street <luke@street.dev>"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
repository = "https://github.com/encounter/sjiswrap"
|
||||
|
@ -22,15 +22,15 @@ debug = []
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.72"
|
||||
encoding_rs = "0.8.32"
|
||||
itertools = "0.11.0"
|
||||
lazy_static = "1.4.0"
|
||||
encoding_rs = { version = "0.8.32", features = ["simd-accel", "fast-kanji-encode"] }
|
||||
memexec = { version = "0.2.0", features = ["hook"] }
|
||||
rustc-hash = "1.1.0"
|
||||
|
||||
[dependencies.windows]
|
||||
version = "0.48.0"
|
||||
features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_Globalization",
|
||||
"Win32_Security",
|
||||
"Win32_Storage_FileSystem",
|
||||
"Win32_System_Environment",
|
||||
|
|
195
src/main.rs
195
src/main.rs
|
@ -2,19 +2,19 @@
|
|||
use std::{
|
||||
borrow::Cow,
|
||||
cmp::min,
|
||||
collections::HashMap,
|
||||
ffi::{c_char, c_void, CStr, CString, OsString},
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
ffi::{c_char, c_void, CStr, CString, OsStr, OsString},
|
||||
fs::File,
|
||||
io::Read,
|
||||
iter::{Cloned, Peekable},
|
||||
path::PathBuf,
|
||||
mem::MaybeUninit,
|
||||
path::{Path, PathBuf},
|
||||
process::exit,
|
||||
sync::Mutex,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use encoding_rs::SHIFT_JIS;
|
||||
use lazy_static::lazy_static;
|
||||
use rustc_hash::FxHashMap;
|
||||
use windows::{
|
||||
core::{PCSTR, PCWSTR},
|
||||
Win32::{
|
||||
|
@ -33,17 +33,20 @@ use windows::{
|
|||
};
|
||||
|
||||
/// Whether to hook and encode a file.
|
||||
fn is_text_file(path: &str) -> bool {
|
||||
path.ends_with(".c")
|
||||
|| path.ends_with(".cc")
|
||||
|| path.ends_with(".cp")
|
||||
|| path.ends_with(".cpp")
|
||||
|| path.ends_with(".cxx")
|
||||
|| path.ends_with(".h")
|
||||
|| path.ends_with(".hh")
|
||||
|| path.ends_with(".hp")
|
||||
|| path.ends_with(".hpp")
|
||||
|| path.ends_with(".hxx")
|
||||
fn is_text_file(path: &Path) -> bool {
|
||||
let Some(ext) = path.extension() else {
|
||||
return false;
|
||||
};
|
||||
ext == OsStr::new("c")
|
||||
|| ext == OsStr::new("cc")
|
||||
|| ext == OsStr::new("cp")
|
||||
|| ext == OsStr::new("cpp")
|
||||
|| ext == OsStr::new("cxx")
|
||||
|| ext == OsStr::new("h")
|
||||
|| ext == OsStr::new("hh")
|
||||
|| ext == OsStr::new("hp")
|
||||
|| ext == OsStr::new("hpp")
|
||||
|| ext == OsStr::new("hxx")
|
||||
}
|
||||
|
||||
macro_rules! debug_println {
|
||||
|
@ -65,6 +68,8 @@ fn main() -> Result<()> {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
unsafe { GLOBAL_STATE = MaybeUninit::new(GlobalState::default()) };
|
||||
|
||||
let path = PathBuf::from(&args[1]);
|
||||
let parent = CString::new(
|
||||
path.parent()
|
||||
|
@ -95,13 +100,14 @@ fn main() -> Result<()> {
|
|||
hooks.insert("kernel32.dll!CloseHandle".into(), hook_CloseHandle as *const c_void);
|
||||
hooks.insert("kernel32.dll!ReadFile".into(), hook_ReadFile as *const c_void);
|
||||
hooks.insert("kernel32.dll!SetFilePointer".into(), hook_SetFilePointer as *const c_void);
|
||||
hooks.insert("kernel32.dll!IsDBCSLeadByte".into(), hook_IsDBCSLeadByte as *const c_void);
|
||||
unsafe { memexec::memexec_exe_with_hooks(&buf, &hooks) }.expect("Failed to execute");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// File that has been read into memory and encoded.
|
||||
struct FileHandle {
|
||||
data: Vec<u8>,
|
||||
path: PathBuf,
|
||||
pos: u64,
|
||||
}
|
||||
|
||||
|
@ -109,17 +115,24 @@ struct FileHandle {
|
|||
#[derive(Default)]
|
||||
struct GlobalState {
|
||||
cmdline: Option<CString>,
|
||||
file_handles: HashMap<isize, FileHandle>,
|
||||
encoded_files: FxHashMap<PathBuf, Vec<u8>>,
|
||||
file_handles: FxHashMap<isize, FileHandle>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref GLOBAL_STATE: Mutex<GlobalState> = Default::default();
|
||||
impl GlobalState {
|
||||
fn file_by_handle(&mut self, handle: HANDLE) -> Option<(&mut FileHandle, &[u8])> {
|
||||
self.file_handles
|
||||
.get_mut(&handle.0)
|
||||
.and_then(|file| self.encoded_files.get(&file.path).map(|data| (file, data.as_slice())))
|
||||
}
|
||||
}
|
||||
|
||||
static mut GLOBAL_STATE: MaybeUninit<GlobalState> = MaybeUninit::uninit();
|
||||
|
||||
/// `GetCommandLineA` hook. Skips our own executable name and replaces the subprocess path with an absolute path.
|
||||
extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
|
||||
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
if let Some(str) = &guard.cmdline {
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
if let Some(str) = &state.cmdline {
|
||||
return PCSTR(str.as_ptr() as *const u8);
|
||||
}
|
||||
|
||||
|
@ -179,8 +192,8 @@ extern "stdcall" fn hook_GetCommandLineA() -> PCSTR {
|
|||
cmdline.extend(iter);
|
||||
}
|
||||
cmdline.push(0);
|
||||
guard.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) });
|
||||
PCSTR(guard.cmdline.as_ref().unwrap().as_ptr() as *const u8)
|
||||
state.cmdline = Some(unsafe { CString::from_vec_with_nul_unchecked(cmdline) });
|
||||
PCSTR(state.cmdline.as_ref().unwrap().as_ptr() as *const u8)
|
||||
}
|
||||
|
||||
/// `GetCommandLineW` hook. Currently unsupported.
|
||||
|
@ -188,6 +201,52 @@ extern "stdcall" fn hook_GetCommandLineW() -> PCSTR {
|
|||
panic!("GetCommandLineW() is not supported");
|
||||
}
|
||||
|
||||
/// Read a file into memory and encode it as Shift JIS.
|
||||
fn encode_file(handle: HANDLE, path: &Path) {
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
state.file_handles.insert(handle.0, FileHandle { path: path.to_path_buf(), pos: 0 });
|
||||
let Entry::Vacant(entry) = state.encoded_files.entry(path.to_path_buf()) else {
|
||||
debug_println!("File already cached: {}", path.display());
|
||||
return;
|
||||
};
|
||||
|
||||
let mut filesize_high = 0u32;
|
||||
let mut filesize = unsafe { GetFileSize(handle, Some(&mut filesize_high)) } as u64;
|
||||
filesize |= (filesize_high as u64) << 32;
|
||||
if filesize >= u32::MAX as u64 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut data = vec![0u8; filesize as usize];
|
||||
let mut bytes_read = 0u32;
|
||||
if !unsafe {
|
||||
ReadFile(
|
||||
handle,
|
||||
Some(data.as_mut_ptr() as *mut c_void),
|
||||
data.len() as u32,
|
||||
Some(&mut bytes_read),
|
||||
None,
|
||||
)
|
||||
}
|
||||
.as_bool()
|
||||
|| bytes_read != filesize as u32
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let str = unsafe { std::str::from_utf8_unchecked(&data) };
|
||||
let (encoded, _, _) = SHIFT_JIS.encode(str);
|
||||
match encoded {
|
||||
Cow::Borrowed(_) => {
|
||||
// No modifications were made, use the original data
|
||||
entry.insert(data);
|
||||
}
|
||||
Cow::Owned(data) => {
|
||||
entry.insert(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `CreateFileA` hook. If it's a text file, read it into memory and encode it as Shift-JIS.
|
||||
extern "stdcall" fn hook_CreateFileA(
|
||||
lpFileName: PCSTR,
|
||||
|
@ -212,44 +271,20 @@ extern "stdcall" fn hook_CreateFileA(
|
|||
.unwrap_or(INVALID_HANDLE_VALUE);
|
||||
let err = unsafe { GetLastError() };
|
||||
|
||||
let path = unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }.to_string_lossy();
|
||||
let path = PathBuf::from(
|
||||
unsafe { CStr::from_ptr(lpFileName.as_ptr() as *const c_char) }
|
||||
.to_str()
|
||||
.expect("CreateFileA(): Path is not valid UTF-8"),
|
||||
);
|
||||
if !ret.is_invalid() && dwDesiredAccess == GENERIC_READ && is_text_file(&path) {
|
||||
let mut filesize_high = 0u32;
|
||||
let mut filesize = unsafe { GetFileSize(ret, Some(&mut filesize_high)) } as u64;
|
||||
filesize |= (filesize_high as u64) << 32;
|
||||
|
||||
if filesize < u32::MAX as u64 {
|
||||
let mut data = vec![0u8; filesize as usize];
|
||||
let mut bytes_read = 0u32;
|
||||
if unsafe {
|
||||
ReadFile(
|
||||
ret,
|
||||
Some(data.as_mut_ptr() as *mut c_void),
|
||||
filesize as u32,
|
||||
Some(&mut bytes_read),
|
||||
None,
|
||||
)
|
||||
}
|
||||
.as_bool()
|
||||
&& bytes_read == filesize as u32
|
||||
{
|
||||
if let Ok(str) = std::str::from_utf8(&data) {
|
||||
let (encoded, _, _) = SHIFT_JIS.encode(str);
|
||||
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
match encoded {
|
||||
Cow::Borrowed(_) => {
|
||||
// No modifications were made, use the original data
|
||||
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
|
||||
}
|
||||
Cow::Owned(data) => {
|
||||
guard.file_handles.insert(ret.0, FileHandle { data, pos: 0 });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
encode_file(ret, &path);
|
||||
}
|
||||
debug_println!("CreateFileA({}, {:#X}) = {:#X}", path, dwDesiredAccess.0, ret.0 as u32);
|
||||
debug_println!(
|
||||
"CreateFileA({}, {:#X}) = {:#X}",
|
||||
path.display(),
|
||||
dwDesiredAccess.0,
|
||||
ret.0 as u32
|
||||
);
|
||||
unsafe { SetLastError(err) };
|
||||
ret
|
||||
}
|
||||
|
@ -270,10 +305,10 @@ extern "stdcall" fn hook_CreateFileW(
|
|||
/// `GetFileSize` hook. If the file was read into memory, return that size instead.
|
||||
extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) -> u32 {
|
||||
if !hFile.is_invalid() {
|
||||
let guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
if let Some(file) = guard.file_handles.get(&hFile.0) {
|
||||
debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, file.data.len() as u32);
|
||||
return file.data.len() as u32;
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
if let Some((_handle, data)) = state.file_by_handle(hFile) {
|
||||
debug_println!("OVERRIDE: GetFileSize({:#X}) = {:#X}", hFile.0, data.len() as u32);
|
||||
return data.len() as u32;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -285,9 +320,12 @@ extern "stdcall" fn hook_GetFileSize(hFile: HANDLE, lpFileSizeHigh: *mut u32) ->
|
|||
/// `CloseHandle` hook. If the file was read into memory, free it.
|
||||
extern "stdcall" fn hook_CloseHandle(hObject: HANDLE) -> BOOL {
|
||||
if !hObject.is_invalid() {
|
||||
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
if guard.file_handles.remove(&hObject.0).is_some() {
|
||||
debug_println!("File handle removed: {:#X}", hObject.0);
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
if let Some(handle) = state.file_handles.remove(&hObject.0) {
|
||||
let _ = handle;
|
||||
debug_println!("File handle removed: {:#X} ({})", hObject.0, handle.path.display());
|
||||
// Purposefully leave the file data itself in the cache.
|
||||
// mwcceppc in particular will read the same file multiple times.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -305,20 +343,20 @@ extern "stdcall" fn hook_ReadFile(
|
|||
lpOverlapped: *mut OVERLAPPED,
|
||||
) -> BOOL {
|
||||
if !hFile.is_invalid() {
|
||||
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
if let Some(file) = guard.file_handles.get_mut(&hFile.0) {
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
if let Some((handle, data)) = state.file_by_handle(hFile) {
|
||||
let count = min(
|
||||
nNumberOfBytesToRead,
|
||||
u32::try_from(file.data.len() as u64 - file.pos).unwrap_or(u32::MAX),
|
||||
u32::try_from(data.len() as u64 - handle.pos).unwrap_or(u32::MAX),
|
||||
);
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(
|
||||
file.data.as_ptr().offset(file.pos as isize),
|
||||
data.as_ptr().offset(handle.pos as isize),
|
||||
lpBuffer as *mut u8,
|
||||
count as usize,
|
||||
);
|
||||
}
|
||||
file.pos += count as u64;
|
||||
handle.pos += count as u64;
|
||||
if !lpNumberOfBytesRead.is_null() {
|
||||
unsafe { *lpNumberOfBytesRead = count };
|
||||
}
|
||||
|
@ -362,22 +400,22 @@ extern "stdcall" fn hook_SetFilePointer(
|
|||
dwMoveMethod: SET_FILE_POINTER_MOVE_METHOD,
|
||||
) -> u32 {
|
||||
if !hFile.is_invalid() {
|
||||
let mut guard = GLOBAL_STATE.lock().expect("Failed to lock global state");
|
||||
if let Some(file) = guard.file_handles.get_mut(&hFile.0) {
|
||||
let state = unsafe { GLOBAL_STATE.assume_init_mut() };
|
||||
if let Some((handle, data)) = state.file_by_handle(hFile) {
|
||||
let distance_to_move_high =
|
||||
if lpDistanceToMoveHigh.is_null() { 0 } else { unsafe { *lpDistanceToMoveHigh } };
|
||||
let distance_to_move = lDistanceToMove as i64 | (distance_to_move_high as i64) << 32;
|
||||
let file_size = file.data.len() as u64;
|
||||
let file_size = data.len() as u64;
|
||||
let pos = min(
|
||||
match dwMoveMethod {
|
||||
FILE_BEGIN => distance_to_move as u64,
|
||||
FILE_CURRENT => file.pos.saturating_add_signed(distance_to_move),
|
||||
FILE_CURRENT => handle.pos.saturating_add_signed(distance_to_move),
|
||||
FILE_END => file_size.saturating_add_signed(distance_to_move),
|
||||
_ => panic!("SetFilePointer(): Unsupported move method {:#X}", dwMoveMethod.0),
|
||||
},
|
||||
file_size,
|
||||
);
|
||||
file.pos = pos;
|
||||
handle.pos = pos;
|
||||
debug_println!(
|
||||
"OVERRIDE SetFilePointer({:#X}, {:#X}, {:?}, {}) = {:#X}",
|
||||
hFile.0,
|
||||
|
@ -406,6 +444,9 @@ extern "stdcall" fn hook_SetFilePointer(
|
|||
ret
|
||||
}
|
||||
|
||||
/// `IsDBCSLeadByte` hook. This normally uses the system codepage, override with Shift JIS behavior.
|
||||
extern "stdcall" fn hook_IsDBCSLeadByte(TestChar: u8) -> BOOL { (TestChar & 0x80 != 0).into() }
|
||||
|
||||
/// Get the absolute path of a file.
|
||||
fn get_full_path(path: &CStr) -> Result<CString> {
|
||||
let mut buf = [0u8; 4096];
|
||||
|
|
Loading…
Reference in New Issue