Experimental 64-bit host support

This commit is contained in:
2025-11-04 22:07:51 -07:00
parent 463686d01a
commit 3dd9fb77ff
64 changed files with 1993 additions and 844 deletions

View File

@@ -20,7 +20,7 @@ import os
import sys
import tempfile
from dataclasses import dataclass, field
from enum import IntEnum
from enum import Enum, IntEnum
from pathlib import Path
from typing import Iterable, List, Optional
@@ -50,6 +50,11 @@ if "LIBCLANG_PATH" in os.environ:
)
class Arch(str, Enum):
X86 = "x86"
X86_64 = "x86_64"
class CallingConv(IntEnum):
"""CXCallingConv enum values from clang-c/Index.h"""
@@ -91,13 +96,30 @@ def _get_function_calling_conv(func_type: CXType) -> CallingConv:
return CallingConv(_get_calling_conv(func_type))
class ArgClass(str, Enum):
INT = "int"
MEMORY = "memory"
@dataclass
class ArgInfo:
type: Type
arg_class: ArgClass
sign_extended: bool
@dataclass
class ArgPlacement:
size: int
slot_size: int
primitive: bool
sign_extended: bool
type: Type
stack_offset: Optional[int] = None
register: Optional[str] = None
def __init__(self, arg: ArgInfo, arch: Arch):
self.size = arg.type.get_canonical().get_size()
self.slot_size = _slot_size_for_arch(arg, arch)
self.register = None
self.stack_offset = None
@dataclass
@@ -108,7 +130,7 @@ class FuncInfo:
source_cc: CallingConv
target_cc: CallingConv
variadic: bool
return_type: Type
return_type: ArgInfo
args: List[ArgInfo] = field(default_factory=list)
@@ -118,7 +140,7 @@ class TypedefInfo:
source_cc: CallingConv
target_cc: CallingConv
variadic: bool
return_type: Type
return_type: ArgInfo
args: List[ArgInfo] = field(default_factory=list)
@@ -210,43 +232,146 @@ SIGNED_KINDS = [
]
def _calculate_arg_info(t: Type) -> ArgInfo:
canonical = t.get_canonical()
# if canonical.kind == TypeKind.RECORD:
# arg_class = ArgClass.MEMORY
# else:
arg_class = ArgClass.INT
if canonical.kind == TypeKind.POINTER:
pointee = canonical.get_pointee()
if pointee.kind == TypeKind.POINTER:
print(f"Bugprone: Pointer to pointer ({_type_to_string(t)})")
# Sign-extend signed integers and HANDLE-like typedefs
is_sign_extended = canonical.kind in SIGNED_KINDS or _is_handle_typedef(t)
return ArgInfo(
arg_class=arg_class,
sign_extended=is_sign_extended,
type=t,
)
def _collect_args(func_type: CXType) -> List[ArgInfo]:
"""Collect argument information for a function."""
args: List[ArgInfo] = []
for t in func_type.argument_types():
size = t.get_size()
canonical = t.get_canonical()
# Determine if primitive (not struct/union)
is_primitive = canonical.kind != TypeKind.RECORD
# Determine if sign-extended
# Sign-extend signed integers and HANDLE-like typedefs
is_sign_extended = canonical in SIGNED_KINDS or _is_handle_typedef(t)
# Calculate stack slot size
if size <= 4:
slot_size = 4
elif size <= 8:
slot_size = 8
else:
raise NotImplementedError(
f"Argument size {size} not supported for function {func_type.spelling}"
)
args.append(
ArgInfo(
size=size,
slot_size=slot_size,
primitive=is_primitive,
sign_extended=is_sign_extended,
type=t,
)
)
args.append(_calculate_arg_info(t))
return args
def collect_functions(tu: TranslationUnit, ns_filter: Optional[str]) -> List[FuncInfo]:
def _slot_size_for_arch(arg: ArgInfo, arch: Arch) -> int:
"""Return the slot size (in bytes) used to pass an argument on the given architecture."""
canonical = arg.type.get_canonical()
if canonical.kind == TypeKind.POINTER:
return 8 if arch == Arch.X86_64 else 4
size = canonical.get_size()
if arch == Arch.X86:
if size <= 4:
return 4
if size <= 8:
return 8
elif arch == Arch.X86_64:
if size <= 8:
return 8
raise NotImplementedError(
f"Argument size {size} not supported for architecture {arch.value}"
)
@dataclass
class ArgLayout:
args: List[ArgPlacement]
stack_size: int
def compute_arg_layout(
args: List[ArgInfo],
cc: CallingConv,
arch: Arch,
stack_offset: int = 0,
skip_args: int = 0,
) -> ArgLayout:
"""Compute how each argument is passed for the given calling convention and arch."""
placements: List[ArgPlacement] = []
stack_size = 0
gpr_order: List[str] = []
gpr_index = skip_args
if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL:
gpr_order = ["ecx", "edx"]
elif arch == Arch.X86_64 and cc == CallingConv.C:
gpr_order = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
# Offset our stack based on number of extra arguments
# We assume that every arg represented by skip_args fits in a register
register_size = 8 if arch == Arch.X86_64 else 4
consumed_stack = max(0, skip_args - len(gpr_order)) * register_size
stack_offset += consumed_stack
stack_size += consumed_stack
def _push_stack(arg: ArgInfo) -> None:
nonlocal stack_offset
nonlocal stack_size
placement = ArgPlacement(arg, arch)
placement.stack_offset = stack_offset
placements.append(placement)
stack_offset += placement.slot_size
stack_size += placement.slot_size
def _push_register(arg: ArgInfo) -> None:
nonlocal gpr_index
placement = ArgPlacement(arg, arch)
placement.register = gpr_order[gpr_index]
placements.append(placement)
gpr_index += 1
# Special case for x86 fastcall: stop using registers if any spill onto the stack
if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL:
stack_args_start = 0
for i in range(min(len(gpr_order), len(args))):
if gpr_index >= len(gpr_order):
break
arg = args[i]
slot_size = _slot_size_for_arch(arg, arch)
if arg.arg_class == ArgClass.INT and slot_size == 4:
_push_register(arg)
stack_args_start += 1
else:
break
for i in range(stack_args_start, len(args)):
_push_stack(args[i])
else:
for arg in args:
slot_size = _slot_size_for_arch(arg, arch)
if (
arg.arg_class == ArgClass.INT
and slot_size <= register_size
and gpr_index < len(gpr_order)
):
_push_register(arg)
else:
_push_stack(arg)
return ArgLayout(args=placements, stack_size=stack_size)
def describe_arg_placement(placement: ArgPlacement) -> str:
if placement.register is not None:
return f"{placement.register}[{placement.slot_size}]"
if placement.stack_offset is not None:
return f"stack+{placement.stack_offset}[{placement.slot_size}]"
raise ValueError(f"Unassigned placement {placement}")
def collect_functions(
tu: TranslationUnit, ns_filter: Optional[str], arch: Arch
) -> List[FuncInfo]:
want_ns = ns_filter.split("::") if ns_filter else None
out: dict[str, FuncInfo] = {}
@@ -268,7 +393,7 @@ def collect_functions(tu: TranslationUnit, ns_filter: Optional[str]) -> List[Fun
source_cc=source_cc,
target_cc=_get_function_calling_conv(node.type),
variadic=node.type.is_function_variadic(),
return_type=node.type.get_result(),
return_type=_calculate_arg_info(node.type.get_result()),
args=_collect_args(node.type),
)
@@ -292,7 +417,7 @@ def _type_to_string(t: CXType) -> str:
return spelling
def collect_typedefs(tu: TranslationUnit) -> List[TypedefInfo]:
def collect_typedefs(tu: TranslationUnit, arch: Arch) -> List[TypedefInfo]:
"""Collect function pointer typedefs and type aliases from the translation unit."""
out: dict[str, TypedefInfo] = {}
@@ -309,17 +434,13 @@ def collect_typedefs(tu: TranslationUnit) -> List[TypedefInfo]:
if target_cc == CallingConv.DEFAULT:
return # No CC annotation; skip
variadic = func_type.is_function_variadic()
args = _collect_args(func_type)
return_type = func_type.get_result()
out[name] = TypedefInfo(
name=name,
source_cc=source_cc,
target_cc=target_cc,
variadic=variadic,
return_type=return_type,
args=args,
variadic=func_type.is_function_variadic(),
return_type=_calculate_arg_info(func_type.get_result()),
args=_collect_args(func_type),
)
def visit(node: Cursor) -> None:
@@ -372,17 +493,15 @@ def collect_variables(tu: TranslationUnit, ns_filter: Optional[str]) -> List[Var
return sorted(out.values(), key=lambda v: v.name)
def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str]):
def emit_cc_thunk32(f: FuncInfo | TypedefInfo, lines: List[str]):
if isinstance(f, TypedefInfo):
# Host-to-guest
target = "[eax+4]"
arg_off = 8
call_target = "[eax+4]"
align = 0
host_to_guest = True
elif isinstance(f, FuncInfo):
# Guest-to-host
target = f.mangled
arg_off = 4
call_target = f.mangled
align = 16
host_to_guest = False
@@ -391,49 +510,32 @@ def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str]):
assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, (
"Variadic functions must be cdecl"
)
lines.append(f"\tjmp {target}")
lines.append(f"\tjmp {call_target}")
return
# Compute argument stack offsets
offsets: List[int] = []
for arg in f.args:
offsets.append(arg_off)
arg_off += arg.slot_size
reg_indices: List[int] = []
if f.target_cc == CallingConv.X86_FASTCALL:
# Store the first two non-record 4-byte args in ECX/EDX for GCC/Clang x86 fastcall
if len(f.args) >= 1 and f.args[0].primitive and f.args[0].slot_size == 4:
reg_indices.append(0) # ECX
if len(f.args) >= 2 and f.args[1].primitive and f.args[1].slot_size == 4:
reg_indices.append(1) # EDX
elif f.target_cc == CallingConv.C or f.target_cc == CallingConv.X86_STDCALL:
# No register args for cdecl or stdcall
pass
else:
raise NotImplementedError(
f"Unsupported target calling convention {f.target_cc.name} for function {f.name}"
)
# Bytes we will push for the call (exclude args passed in registers)
stack_bytes = sum(
arg.slot_size for i, arg in enumerate(f.args) if i not in reg_indices
source_layout = compute_arg_layout(
f.args,
f.source_cc,
Arch.X86,
stack_offset=4,
skip_args=1 if host_to_guest else 0,
)
target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86)
# Get current TIB
# Get current TEB
if host_to_guest:
lines.append("\tmov ecx, gs:[currentThreadTeb@ntpoff]")
else:
lines.append("\tmov ecx, fs:[0x18]")
lines.append("\tmov ecx, fs:[TEB_SELF]")
# Swap fs and gs
lines.append("\tmov ax, fs")
lines.append("\tmov dx, word ptr [ecx+0xf98]")
lines.append("\tmov word ptr [ecx+0xf98], ax")
lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]")
lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax")
lines.append("\tmov fs, dx")
lines.append("\tmov ax, gs")
lines.append("\tmov dx, word ptr [ecx+0xf9a]")
lines.append("\tmov word ptr [ecx+0xf9a], ax")
lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]")
lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax")
lines.append("\tmov gs, dx")
# Store guest stack pointer in eax for arg access
@@ -442,48 +544,60 @@ def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str]):
# Swap stack pointer
lines.append("\tpush ebp")
lines.append("\tmov ebp, dword ptr [ecx+0xf9c]")
lines.append("\tmov dword ptr [ecx+0xf9c], esp")
lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]")
lines.append("\tmov dword ptr [ecx+TEB_SP], esp")
lines.append("\tmov esp, ebp")
# Allocate stack space for arguments
if stack_bytes > 0:
lines.append(f"\tsub esp, {stack_bytes}")
if target_layout.stack_size > 0:
lines.append(f"\tsub esp, {target_layout.stack_size}")
# Align stack if needed (must be done after allocating args)
if align > 0:
lines.append(f"\tand esp, ~{align - 1}")
# Copy args onto stack
cur_off = 0
for i, arg in enumerate(f.args):
if i in reg_indices:
# Copy args onto stack for the callee
for idx, target in enumerate(target_layout.args):
if target.stack_offset is None:
continue
base = offsets[i]
for part_off in range(0, arg.slot_size, 4):
lines.append(f"\tmov ecx, [eax+{base + part_off}]")
lines.append(f"\tmov [esp+{cur_off + part_off}], ecx")
cur_off += arg.slot_size
source = source_layout.args[idx]
if source.stack_offset is None:
raise NotImplementedError(
f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented"
)
if source.slot_size != target.slot_size:
raise NotImplementedError(
f"Argument {idx} requires size conversion {source.slot_size}->{target.slot_size}; not implemented"
)
for off in range(0, target.slot_size, 4):
lines.append(f"\tmov ecx, [eax+{source.stack_offset + off}]")
lines.append(f"\tmov [esp+{target.stack_offset + off}], ecx")
# Load args into registers as needed
if len(reg_indices) > 0:
i = reg_indices[0]
offset = offsets[i]
lines.append(f"\tmov ecx, [eax+{offset}]")
if len(reg_indices) > 1:
i = reg_indices[1]
offset = offsets[i]
lines.append(f"\tmov edx, [eax+{offset}]")
for idx, target in enumerate(target_layout.args):
if target.register is None:
continue
source = source_layout.args[idx]
if source.stack_offset is None:
raise NotImplementedError(
f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented"
)
lines.append(f"\tmov {target.register}, [eax+{source.stack_offset}]")
# Call into target
lines.append(f"\tcall {target}")
lines.append(f"\tcall {call_target}")
# Determine if we can clobber eax/edx
if f.return_type.kind == TypeKind.RECORD:
if f.return_type.arg_class != ArgClass.INT:
raise NotImplementedError(
f"Struct return type not supported for function {f.name}"
f"Unsupported return type class {f.return_type.arg_class.value} for function {f.name}"
)
return_size = f.return_type.get_size()
return_size = f.return_type.type.get_size()
save_eax = return_size > 0
save_edx = return_size > 4
if return_size > 8:
@@ -497,16 +611,16 @@ def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str]):
if save_edx:
lines.append("\tpush edx")
if host_to_guest:
lines.append("\tmov ecx, fs:[0x18]")
lines.append("\tmov ecx, fs:[TEB_SELF]")
else:
lines.append("\tmov ecx, gs:[currentThreadTeb@ntpoff]")
lines.append("\tmov ax, fs")
lines.append("\tmov dx, word ptr [ecx+0xf98]")
lines.append("\tmov word ptr [ecx+0xf98], ax")
lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]")
lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax")
lines.append("\tmov fs, dx")
lines.append("\tmov ax, gs")
lines.append("\tmov dx, word ptr [ecx+0xf9a]")
lines.append("\tmov word ptr [ecx+0xf9a], ax")
lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]")
lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax")
lines.append("\tmov gs, dx")
if save_edx:
lines.append("\tpop edx")
@@ -515,29 +629,293 @@ def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str]):
# Swap stack pointer
lines.append("\tmov esp, ebp") # Clean up arg space
lines.append("\tmov ebp, dword ptr [ecx+0xf9c]")
lines.append("\tmov dword ptr [ecx+0xf9c], esp")
lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]")
lines.append("\tmov dword ptr [ecx+TEB_SP], esp")
# Restore stack and frame pointer
lines.append("\tleave")
# Return to guest
if f.source_cc == CallingConv.X86_STDCALL:
ret_bytes = sum(arg.slot_size for arg in f.args)
elif f.source_cc == CallingConv.C:
ret_bytes = 0
else:
raise NotImplementedError(
f"Unsupported source calling convention {f.source_cc.name} for function {f.name}"
)
if ret_bytes > 0:
lines.append(f"\tret {ret_bytes}")
if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0:
lines.append(f"\tret {source_layout.stack_size}")
else:
lines.append("\tret")
def _x64_register_by_slot_size(reg: str, slot_size: int) -> str:
if slot_size == 8:
return reg
if reg in ["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp"]:
if slot_size == 4:
return f"e{reg[1:]}"
elif slot_size == 2:
return reg[1:]
elif slot_size == 1:
if reg in ["rax", "rbx", "rcx", "rdx"]:
return f"{reg[1]}l"
elif reg in ["rsi", "rdi"]:
return f"{reg[1]}il"
else:
return f"{reg[1]}pl"
if slot_size == 4:
return f"{reg}d"
if slot_size == 2:
return f"{reg}w"
if slot_size == 1:
return f"{reg}b"
raise NotImplementedError(f"Unsupported register {reg} for slot size {slot_size}")
def _x64_ptr_type_by_slot_size(slot_size) -> str:
if slot_size == 4:
return "dword ptr"
elif slot_size == 8:
return "qword ptr"
else:
raise ValueError(f"Unsupported slot size {slot_size}")
def emit_cc_thunk64(f: FuncInfo | TypedefInfo, lines: List[str]):
if isinstance(f, TypedefInfo):
# Host-to-guest
call_target = "edi"
align = 0
host_to_guest = True
elif isinstance(f, FuncInfo):
# Guest-to-host
call_target = f.mangled
align = 16
host_to_guest = False
if f.variadic:
# Variadic functions are not yet supported for calling convention conversion.
assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, (
"Variadic functions must be cdecl"
)
lines.append(f"\tjmp {call_target}")
return
source_layout = compute_arg_layout(
f.args,
f.source_cc,
Arch.X86_64 if host_to_guest else Arch.X86,
stack_offset=24 if host_to_guest else 16,
skip_args=1 if host_to_guest else 0,
)
target_layout = compute_arg_layout(
f.args, f.target_cc, Arch.X86 if host_to_guest else Arch.X86_64
)
if host_to_guest:
lines.append(".code64")
# Save rbx and rbp
lines.append("\tpush rbx")
lines.append("\tpush rbp")
# Stash host stack in r10
lines.append("\tmov r10, rsp")
# Get current TEB
lines.append("\tmov rcx, fs:[currentThreadTeb@tpoff]")
# Save FS base
lines.append("\trdfsbase r9")
lines.append("\tmov qword ptr [rcx+TEB_FSBASE], r9")
# Save RSP and load guest stack
lines.append("\tmov rbp, qword ptr [rcx+TEB_SP]")
lines.append("\tmov qword ptr [rcx+TEB_SP], rsp")
lines.append("\tmov rsp, rbp")
# Allocate stack space for arguments
if target_layout.stack_size > 0:
lines.append(f"\tsub rsp, {target_layout.stack_size}")
# Align stack if needed (must be done after allocating args)
if align > 0:
lines.append(f"\tand rsp, ~{align - 1}")
# Transfer arguments
for i, target in enumerate(target_layout.args):
if target.stack_offset is None:
raise NotImplementedError(f"Unexpected register argument {target}")
source = source_layout.args[i]
if source.stack_offset is not None:
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
register = _x64_register_by_slot_size("rax", target.slot_size)
lines.append(
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
)
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
register = _x64_register_by_slot_size("rax", target.slot_size)
elif source.register is not None:
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
register = _x64_register_by_slot_size(source.register, target.slot_size)
else:
raise ValueError(f"Argument {i} is not a register or stack offset")
lines.append(f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}")
# Jump to 32-bit mode
lines.append("\tLJMP32")
# Setup FS selector
lines.append("\tmov ax, word ptr [ecx+TEB_FS_SEL]")
lines.append("\tmov fs, ax")
# Call into target
lines.append(f"\tcall {call_target}")
# Get current TEB
lines.append("\tmov ecx, fs:[TEB_SELF]")
# Jump back to 64-bit
lines.append("\tLJMP64")
# Sign extend return value if necessary
if f.return_type.sign_extended:
lines.append("\tcdqe")
# Restore FS base
lines.append("\tmov r9, qword ptr [rcx+TEB_FSBASE]")
lines.append("\twrfsbase r9")
# Restore host stack
lines.append("\tmov rsp, qword ptr [rcx+TEB_SP]")
lines.append("\tmov qword ptr [rcx+TEB_SP], rbp")
# Restore rbp, rbx and return
lines.append("\tpop rbp")
lines.append("\tpop rbx")
lines.append("\tret")
else:
lines.append(".code32")
# Save registers
lines.append("\tpush ebp")
lines.append("\tpush esi")
lines.append("\tpush edi")
# Get current TEB
lines.append("\tmov ecx, fs:[TEB_SELF]")
# Save fs segment
lines.append("\tmov di, fs")
lines.append("\tmov word ptr [ecx+TEB_FS_SEL], di")
# Jump back to 64-bit
lines.append("\tLJMP64")
# Restore FS base
lines.append("\tmov r9, qword ptr [rcx+TEB_FSBASE]")
lines.append("\twrfsbase r9")
# Stash guest stack in r10
lines.append("\tmov r10, rsp")
# Restore host stack
lines.append("\tmov rbp, qword ptr [rcx+TEB_SP]")
lines.append("\tmov qword ptr [rcx+TEB_SP], rsp")
lines.append("\tmov rsp, rbp")
# Allocate stack space for arguments
if target_layout.stack_size > 0:
lines.append(f"\tsub rsp, {target_layout.stack_size}")
# Align stack if needed (must be done after allocating args)
if align > 0:
lines.append(f"\tand rsp, ~{align - 1}")
# Transfer args
for i, target in enumerate(target_layout.args):
arg = f.args[i]
source = source_layout.args[i]
if target.stack_offset is not None:
if source.stack_offset is not None:
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
register = _x64_register_by_slot_size("rax", source.slot_size)
lines.append(
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
)
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
register = _x64_register_by_slot_size("rax", target.slot_size)
elif source.register is not None:
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
register = _x64_register_by_slot_size(
source.register, target.slot_size
)
else:
raise ValueError(f"Argument {i} is not a register or stack offset")
lines.append(
f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}"
)
elif target.register is not None:
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
if source.slot_size == 4 and target.slot_size == 8:
if arg.sign_extended:
register = _x64_register_by_slot_size(
target.register, source.slot_size
)
lines.append(
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
)
lines.append(f"\tmovsxd {target.register}, {register}")
else:
register = _x64_register_by_slot_size(
target.register, source.slot_size
)
lines.append(
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
)
elif source.slot_size == 8 and target.slot_size == 8:
lines.append(
f"\tmov {target.register}, {ptr_type} [r10+{source.stack_offset}]"
)
else:
raise NotImplementedError(
f"Unsupported conversion from {source.slot_size} to {target.slot_size}"
)
# Call into target
lines.append(f"\tcall {call_target}")
# Get current TEB
lines.append("\tmov rcx, fs:[currentThreadTeb@tpoff]")
# Restore host stack
lines.append("\tmov rsp, qword ptr [rcx+TEB_SP]")
lines.append("\tmov qword ptr [rcx+TEB_SP], rbp")
# Jump to 32-bit mode
lines.append("\tLJMP32")
# Setup FS selector
lines.append("\tmov di, word ptr [ecx+TEB_FS_SEL]")
lines.append("\tmov fs, di")
# Restore registers
lines.append("\tpop edi")
lines.append("\tpop esi")
lines.append("\tpop ebp")
# Return to guest
if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0:
lines.append(f"\tret {source_layout.stack_size}")
else:
lines.append("\tret")
def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str], arch: Arch):
if arch == Arch.X86_64:
return emit_cc_thunk64(f, lines)
elif arch == Arch.X86:
return emit_cc_thunk32(f, lines)
def emit_guest_to_host_thunks(
lines: List[str], dll: str, funcs: Iterable[FuncInfo]
lines: List[str], dll: str, funcs: Iterable[FuncInfo], arch: Arch
) -> None:
for f in funcs:
thunk = f"thunk_{dll}_{f.name}"
@@ -545,19 +923,24 @@ def emit_guest_to_host_thunks(
lines.append(
f"# {f.qualified_ns}::{f.name} (source_cc={f.source_cc.name}, target_cc={f.target_cc.name}, variadic={f.variadic})"
)
source_layout = compute_arg_layout(f.args, f.source_cc, Arch.X86)
target_layout = compute_arg_layout(f.args, f.target_cc, arch)
for i, arg in enumerate(f.args):
lines.append(
f"\t# Arg {i} (slot_size={arg.slot_size}, primitive={arg.primitive}, sign_extended={arg.sign_extended})"
)
details: List[str] = []
details.append(f"src={describe_arg_placement(source_layout.args[i])}")
details.append(f"dst={describe_arg_placement(target_layout.args[i])}")
details.append(f"class={arg.arg_class.value}")
details.append(f"sign_extended={arg.sign_extended}")
lines.append(f"\t# Arg {i} ({', '.join(details)})")
lines.append(f".globl {thunk}")
lines.append(f".type {thunk}, @function")
lines.append(f"{thunk}:")
emit_cc_thunk(f, lines)
emit_cc_thunk(f, lines, arch)
lines.append(f".size {thunk}, .-{thunk}")
def emit_host_to_guest_thunks(
lines: List[str], typedefs: Iterable[TypedefInfo]
lines: List[str], typedefs: Iterable[TypedefInfo], arch: Arch
) -> None:
for f in typedefs:
thunk = f"call_{f.name}"
@@ -565,14 +948,23 @@ def emit_host_to_guest_thunks(
lines.append(
f"# {f.name} (target_cc={f.target_cc.name}, variadic={f.variadic})"
)
source_layout = compute_arg_layout(f.args, f.source_cc, arch, skip_args=1)
target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86)
for i, arg in enumerate(f.args):
lines.append(
f"\t# Arg {i} (slot_size={arg.slot_size}, primitive={arg.primitive}, sign_extended={arg.sign_extended})"
)
details: List[str] = []
details.append(f"src={describe_arg_placement(source_layout.args[i])}")
details.append(f"dst={describe_arg_placement(target_layout.args[i])}")
details.append(f"class={arg.arg_class.value}")
details.append(f"sign_extended={arg.sign_extended}")
lines.append(f"\t# Arg {i} ({', '.join(details)})")
# details = []
# details.append(f"class={f.return_type.arg_class.value}")
# details.append(f"sign_extended={f.return_type.sign_extended}")
# lines.append(f"\t# Ret ({', '.join(details)})")
lines.append(f".weak {thunk}")
lines.append(f".type {thunk}, @function")
lines.append(f"{thunk}:")
emit_cc_thunk(f, lines)
emit_cc_thunk(f, lines, arch)
lines.append(f".size {thunk}, .-{thunk}")
@@ -581,6 +973,7 @@ def emit_header_mapping(
funcs: Iterable[FuncInfo],
typedefs: Iterable[TypedefInfo],
variables: Iterable[VarInfo],
arch: Arch,
) -> str:
guard = f"WIBO_GEN_{dll.upper()}_THUNKS_H"
lines: List[str] = []
@@ -619,16 +1012,18 @@ def emit_header_mapping(
type_str = _canonical_type_str(arg.type)
args.append(f"{type_str} arg{i}")
param_list = ", ".join(args)
return_type = _canonical_type_str(f.return_type)
if f.source_cc == CallingConv.X86_STDCALL:
cc_attr = "__attribute__((stdcall))"
return_type = _canonical_type_str(f.return_type.type)
if arch == Arch.X86_64:
cc_attr = ""
elif f.source_cc == CallingConv.X86_STDCALL:
cc_attr = "__attribute__((stdcall)) "
elif f.source_cc == CallingConv.C:
cc_attr = "__attribute__((cdecl))"
cc_attr = "__attribute__((cdecl)) "
else:
raise NotImplementedError(
f"Unsupported calling convention {f.source_cc.name} for function {f.name}"
)
lines.append(f"{cc_attr} {return_type} {thunk}({param_list});")
lines.append(f"{cc_attr}{return_type} {thunk}({param_list});")
# Host-to-guest thunk functions
for td in typedefs:
@@ -642,7 +1037,7 @@ def emit_header_mapping(
params.append(f"{type_str} arg{i}")
param_list = ", ".join(params)
return_type = _type_to_string(td.return_type)
return_type = _type_to_string(td.return_type.type)
lines.append(f"{return_type} {thunk}({param_list});")
lines.append("#ifdef __cplusplus\n}\n#endif")
@@ -672,7 +1067,7 @@ def main() -> int:
ap.add_argument(
"--namespace", dest="ns", default=None, help="Namespace filter, e.g. kernel32"
)
ap.add_argument("--arch", choices=["x86"], default="x86")
ap.add_argument("--arch", choices=["x86", "x86_64"], default="x86")
ap.add_argument(
"--out-asm", type=Path, required=True, help="Output assembly file (.S)"
)
@@ -682,10 +1077,17 @@ def main() -> int:
ap.add_argument("-I", dest="incs", action="append", default=[])
args = ap.parse_args()
if args.arch == "x86":
arch = Arch.X86
elif args.arch == "x86_64":
arch = Arch.X86_64
else:
raise ValueError(f"Unsupported architecture: {args.arch}")
target = "i686-pc-linux-gnu" if args.arch == "x86" else "x86_64-pc-linux-gnu"
tu = parse_tu(args.headers, args.incs, target)
funcs = collect_functions(tu, args.ns)
typedefs = collect_typedefs(tu)
funcs = collect_functions(tu, args.ns, arch)
typedefs = collect_typedefs(tu, arch)
variables = collect_variables(tu, args.ns)
if not funcs and not typedefs and not variables:
@@ -694,15 +1096,15 @@ def main() -> int:
lines: List[str] = []
lines.append("# Auto-generated thunks; DO NOT EDIT.")
lines.append(".intel_syntax noprefix")
lines.append('#include "macros.S"')
lines.append('.section .note.GNU-stack, "", @progbits')
lines.append(".text")
emit_guest_to_host_thunks(lines, args.dll, funcs)
emit_host_to_guest_thunks(lines, typedefs)
emit_guest_to_host_thunks(lines, args.dll, funcs, arch)
emit_host_to_guest_thunks(lines, typedefs, arch)
asm = "\n".join(lines) + "\n"
hdr = emit_header_mapping(args.dll, funcs, typedefs, variables)
hdr = emit_header_mapping(args.dll, funcs, typedefs, variables, arch)
args.out_asm.parent.mkdir(parents=True, exist_ok=True)
args.out_hdr.parent.mkdir(parents=True, exist_ok=True)