#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.10" # dependencies = ["clang==17.0.6"] # /// """ Generate Windows ABI trampolines by scanning C++ prototypes using libclang. This emits x86 trampolines for guest<->host calls. """ if __name__ == "__main__": import script_venv script_venv.bootstrap_venv(__file__) import argparse import ctypes import os import sys import tempfile from dataclasses import dataclass, field from enum import Enum, IntEnum from pathlib import Path from typing import Iterable, List, Optional from clang.cindex import ( Config, Cursor, CursorKind, Index, StorageClass, TranslationUnit, Type, TypeKind, conf, ) from clang.cindex import Type as CXType # Allow libclang path to be specified via environment variable if "LIBCLANG_PATH" in os.environ: libclang_path = os.environ["LIBCLANG_PATH"] if os.path.isfile(libclang_path): Config.set_library_file(libclang_path) elif os.path.isdir(libclang_path): Config.set_library_path(libclang_path) else: sys.stderr.write( f"Warning: LIBCLANG_PATH={libclang_path} is not a file or directory\n" ) class Arch(str, Enum): X86 = "x86" X86_64 = "x86_64" class CallingConv(IntEnum): """CXCallingConv enum values from clang-c/Index.h""" DEFAULT = 0 C = 1 X86_STDCALL = 2 X86_FASTCALL = 3 X86_THISCALL = 4 X86_PASCAL = 5 AAPCS = 6 AAPCS_VFP = 7 X86_REGCALL = 8 INTELOCLBICC = 9 WIN64 = 10 X86_64_WIN64 = 11 X86_64_SYSV = 12 X86_VECTORCALL = 13 SWIFT = 14 PRESERVEMOST = 15 PRESERVEALL = 16 AARCH64_VECTORCALL = 17 SWIFTASYNC = 18 AARCH64_SVEPCS = 19 M68K_RTD = 20 INVALID = 100 UNEXPOSED = 200 # Register the clang_getFunctionTypeCallingConv function _get_calling_conv = conf.lib.clang_getFunctionTypeCallingConv _get_calling_conv.argtypes = [CXType] _get_calling_conv.restype = ctypes.c_int def _get_function_calling_conv(func_type: CXType) -> CallingConv: """ Get the calling convention of a function type. """ return CallingConv(_get_calling_conv(func_type)) class ArgClass(str, Enum): INT = "int" MEMORY = "memory" @dataclass class ArgInfo: type: Type arg_class: ArgClass sign_extended: bool @dataclass class ArgPlacement: size: int slot_size: int stack_offset: Optional[int] = None register: Optional[str] = None def __init__(self, arg: ArgInfo, arch: Arch): self.size = arg.type.get_canonical().get_size() self.slot_size = _slot_size_for_arch(arg, arch) self.register = None self.stack_offset = None @dataclass class FuncInfo: qualified_ns: str name: str mangled: str source_cc: CallingConv target_cc: CallingConv variadic: bool return_type: ArgInfo args: List[ArgInfo] = field(default_factory=list) @dataclass class TypedefInfo: name: str source_cc: CallingConv target_cc: CallingConv variadic: bool return_type: ArgInfo args: List[ArgInfo] = field(default_factory=list) @dataclass class VarInfo: qualified_ns: str name: str def parse_tu( headers: List[str], include_dirs: List[str], target: str ) -> TranslationUnit: # Construct a tiny TU that includes the requested headers tu_source = "\n".join([f'#include "{h}"' for h in headers]) + "\n" with tempfile.NamedTemporaryFile("w", suffix=".cpp") as tf: tf.write(tu_source) tf.flush() args = [ "-x", "c++", "-std=c++17", "-target", target, "-DWIBO_CODEGEN=1", ] + [arg for inc in include_dirs for arg in ("-I", inc)] index = Index.create() tu = index.parse( tf.name, args=args, options=TranslationUnit.PARSE_SKIP_FUNCTION_BODIES ) for d in tu.diagnostics: if d.severity >= d.Warning: sys.stderr.write(str(d) + "\n") return tu def _cursor_namespace(cursor: Cursor) -> List[str]: ns: List[str] = [] c = cursor while c is not None and c.kind != CursorKind.TRANSLATION_UNIT: if c.kind == CursorKind.NAMESPACE and c.spelling: ns.append(c.spelling) c = c.semantic_parent return list(reversed(ns)) def _source_cc_from_annotations(func: Cursor) -> CallingConv: for child in func.get_children(): if child.kind == CursorKind.ANNOTATE_ATTR: if child.spelling == "CC:fastcall": return CallingConv.X86_FASTCALL elif child.spelling == "CC:stdcall": return CallingConv.X86_STDCALL elif child.spelling == "CC:cdecl": return CallingConv.C return CallingConv.DEFAULT def _is_handle_typedef(arg_type: CXType) -> bool: """Check if a type is a HANDLE-like typedef (HWND, HINSTANCE, etc.).""" t = arg_type # Trace through ELABORATED and TYPEDEF to find the original typedef name while t.kind == TypeKind.ELABORATED or t.kind == TypeKind.TYPEDEF: if t.kind == TypeKind.TYPEDEF: decl = t.get_declaration() name = decl.spelling # Windows HANDLE types conventionally start with 'H' if name and name.startswith("H") and name.isupper(): return True t = decl.underlying_typedef_type elif t.kind == TypeKind.ELABORATED: named = t.get_named_type() if named is None: break t = named else: break return False SIGNED_KINDS = [ TypeKind.SCHAR, TypeKind.CHAR_S, TypeKind.SHORT, TypeKind.INT, TypeKind.LONG, TypeKind.LONGLONG, TypeKind.INT128, ] def _calculate_arg_info(t: Type) -> ArgInfo: canonical = t.get_canonical() # if canonical.kind == TypeKind.RECORD: # arg_class = ArgClass.MEMORY # else: arg_class = ArgClass.INT if canonical.kind == TypeKind.POINTER: pointee = canonical.get_pointee() if pointee.kind == TypeKind.POINTER: print(f"Bugprone: Pointer to pointer ({_type_to_string(t)})") # Sign-extend signed integers and HANDLE-like typedefs is_sign_extended = canonical.kind in SIGNED_KINDS or _is_handle_typedef(t) return ArgInfo( arg_class=arg_class, sign_extended=is_sign_extended, type=t, ) def _collect_args(func_type: CXType) -> List[ArgInfo]: """Collect argument information for a function.""" args: List[ArgInfo] = [] for t in func_type.argument_types(): args.append(_calculate_arg_info(t)) return args def _slot_size_for_arch(arg: ArgInfo, arch: Arch) -> int: """Return the slot size (in bytes) used to pass an argument on the given architecture.""" canonical = arg.type.get_canonical() if canonical.kind == TypeKind.POINTER: return 8 if arch == Arch.X86_64 else 4 size = canonical.get_size() if arch == Arch.X86: if size <= 4: return 4 if size <= 8: return 8 elif arch == Arch.X86_64: if size <= 8: return 8 raise NotImplementedError( f"Argument size {size} not supported for architecture {arch.value}" ) @dataclass class ArgLayout: args: List[ArgPlacement] stack_size: int def compute_arg_layout( args: List[ArgInfo], cc: CallingConv, arch: Arch, stack_offset: int = 0, skip_args: int = 0, ) -> ArgLayout: """Compute how each argument is passed for the given calling convention and arch.""" placements: List[ArgPlacement] = [] stack_size = 0 gpr_order: List[str] = [] gpr_index = skip_args if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL: gpr_order = ["ecx", "edx"] elif arch == Arch.X86_64 and cc == CallingConv.C: gpr_order = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"] # Offset our stack based on number of extra arguments # We assume that every arg represented by skip_args fits in a register register_size = 8 if arch == Arch.X86_64 else 4 consumed_stack = max(0, skip_args - len(gpr_order)) * register_size stack_offset += consumed_stack stack_size += consumed_stack def _push_stack(arg: ArgInfo) -> None: nonlocal stack_offset nonlocal stack_size placement = ArgPlacement(arg, arch) placement.stack_offset = stack_offset placements.append(placement) stack_offset += placement.slot_size stack_size += placement.slot_size def _push_register(arg: ArgInfo) -> None: nonlocal gpr_index placement = ArgPlacement(arg, arch) placement.register = gpr_order[gpr_index] placements.append(placement) gpr_index += 1 # Special case for x86 fastcall: stop using registers if any spill onto the stack if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL: stack_args_start = 0 for i in range(min(len(gpr_order), len(args))): if gpr_index >= len(gpr_order): break arg = args[i] slot_size = _slot_size_for_arch(arg, arch) if arg.arg_class == ArgClass.INT and slot_size == 4: _push_register(arg) stack_args_start += 1 else: break for i in range(stack_args_start, len(args)): _push_stack(args[i]) else: for arg in args: slot_size = _slot_size_for_arch(arg, arch) if ( arg.arg_class == ArgClass.INT and slot_size <= register_size and gpr_index < len(gpr_order) ): _push_register(arg) else: _push_stack(arg) return ArgLayout(args=placements, stack_size=stack_size) def describe_arg_placement(placement: ArgPlacement) -> str: if placement.register is not None: return f"{placement.register}[{placement.slot_size}]" if placement.stack_offset is not None: return f"stack+{placement.stack_offset}[{placement.slot_size}]" raise ValueError(f"Unassigned placement {placement}") def collect_functions( tu: TranslationUnit, ns_filter: Optional[str], arch: Arch ) -> List[FuncInfo]: want_ns = ns_filter.split("::") if ns_filter else None out: dict[str, FuncInfo] = {} def visit(node: Cursor) -> None: if node.kind == CursorKind.FUNCTION_DECL: ns_parts = _cursor_namespace(node) if want_ns is not None and ns_parts != want_ns: return name = node.spelling if not name: return source_cc = _source_cc_from_annotations(node) if source_cc == CallingConv.DEFAULT: return # No CC annotation; skip out[name] = FuncInfo( qualified_ns="::".join(ns_parts), name=name, mangled=node.mangled_name or name, source_cc=source_cc, target_cc=_get_function_calling_conv(node.type), variadic=node.type.is_function_variadic(), return_type=_calculate_arg_info(node.type.get_result()), args=_collect_args(node.type), ) # Recurse into children if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE): for c in node.get_children(): visit(c) if tu.cursor is not None: visit(tu.cursor) return sorted(out.values(), key=lambda f: f.name) def _type_to_string(t: CXType) -> str: """Convert a CXType to a C type string.""" spelling = t.spelling # Clean up common type spellings spelling = ( spelling.replace("struct ", "").replace("union ", "").replace("enum ", "") ) return spelling def collect_typedefs(tu: TranslationUnit, arch: Arch) -> List[TypedefInfo]: """Collect function pointer typedefs and type aliases from the translation unit.""" out: dict[str, TypedefInfo] = {} def process_function_pointer_type( name: str, node: Cursor, func_type: CXType ) -> None: """Process a function pointer type and add it to the output.""" if not name: return # Determine calling convention source_cc = _get_function_calling_conv(func_type) target_cc = _source_cc_from_annotations(node) if target_cc == CallingConv.DEFAULT: return # No CC annotation; skip out[name] = TypedefInfo( name=name, source_cc=source_cc, target_cc=target_cc, variadic=func_type.is_function_variadic(), return_type=_calculate_arg_info(func_type.get_result()), args=_collect_args(func_type), ) def visit(node: Cursor) -> None: if node.kind == CursorKind.TYPEDEF_DECL: name = node.spelling if not name: return underlying = node.underlying_typedef_type if underlying.kind == TypeKind.POINTER: pointee = underlying.get_pointee() if pointee.kind == TypeKind.FUNCTIONPROTO: process_function_pointer_type(name, node, pointee) # Recurse into children if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE): for c in node.get_children(): visit(c) if tu.cursor is not None: visit(tu.cursor) return sorted(out.values(), key=lambda t: t.name) def collect_variables(tu: TranslationUnit, ns_filter: Optional[str]) -> List[VarInfo]: """Collect extern variable declarations from the translation unit.""" want_ns = ns_filter.split("::") if ns_filter else None out: dict[str, VarInfo] = {} def visit(node: Cursor) -> None: if node.kind == CursorKind.VAR_DECL: if node.storage_class != StorageClass.EXTERN or node.is_definition(): return ns_parts = _cursor_namespace(node) if want_ns is not None and ns_parts != want_ns: return name = node.spelling if not name: return out[name] = VarInfo( qualified_ns="::".join(ns_parts), name=name, ) if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE): for c in node.get_children(): visit(c) if tu.cursor is not None: visit(tu.cursor) return sorted(out.values(), key=lambda v: v.name) def emit_cc_thunk32(f: FuncInfo | TypedefInfo, lines: List[str]): if isinstance(f, TypedefInfo): # Host-to-guest call_target = "[eax+4]" align = 0 host_to_guest = True elif isinstance(f, FuncInfo): # Guest-to-host call_target = f.mangled align = 16 host_to_guest = False if f.variadic: # Variadic functions are not yet supported for calling convention conversion. assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, ( "Variadic functions must be cdecl" ) lines.append(f"\tjmp {call_target}") return source_layout = compute_arg_layout( f.args, f.source_cc, Arch.X86, stack_offset=4, skip_args=1 if host_to_guest else 0, ) target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86) # Get current TEB if host_to_guest: lines.append("\tGET_TEB_HOST ecx") else: lines.append("\tmov ecx, fs:[TEB_SELF]") # Swap fs and gs lines.append("\tmov ax, fs") lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]") lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax") lines.append("\tmov fs, dx") lines.append("\tmov ax, gs") lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]") lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax") lines.append("\tmov gs, dx") # Store guest stack pointer in eax for arg access if len(f.args) > 0 or host_to_guest: lines.append("\tmov eax, esp") # Swap stack pointer lines.append("\tpush ebp") lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]") lines.append("\tmov dword ptr [ecx+TEB_SP], esp") lines.append("\tmov esp, ebp") # Allocate stack space for arguments if target_layout.stack_size > 0: lines.append(f"\tsub esp, {target_layout.stack_size}") # Align stack if needed (must be done after allocating args) if align > 0: lines.append(f"\tand esp, ~{align - 1}") # Copy args onto stack for the callee for idx, target in enumerate(target_layout.args): if target.stack_offset is None: continue source = source_layout.args[idx] if source.stack_offset is None: raise NotImplementedError( f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented" ) if source.slot_size != target.slot_size: raise NotImplementedError( f"Argument {idx} requires size conversion {source.slot_size}->{target.slot_size}; not implemented" ) for off in range(0, target.slot_size, 4): lines.append(f"\tmov ecx, [eax+{source.stack_offset + off}]") lines.append(f"\tmov [esp+{target.stack_offset + off}], ecx") # Load args into registers as needed for idx, target in enumerate(target_layout.args): if target.register is None: continue source = source_layout.args[idx] if source.stack_offset is None: raise NotImplementedError( f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented" ) lines.append(f"\tmov {target.register}, [eax+{source.stack_offset}]") # Call into target lines.append(f"\tcall {call_target}") # Determine if we can clobber eax/edx if f.return_type.arg_class != ArgClass.INT: raise NotImplementedError( f"Unsupported return type class {f.return_type.arg_class.value} for function {f.name}" ) return_size = f.return_type.type.get_size() save_eax = return_size > 0 save_edx = return_size > 4 if return_size > 8: raise NotImplementedError( f"Return size {return_size} not supported for function {f.name}" ) # Restore segment registers if save_eax: lines.append("\tpush eax") if save_edx: lines.append("\tpush edx") if host_to_guest: lines.append("\tmov ecx, fs:[TEB_SELF]") else: lines.append("\tGET_TEB_HOST ecx") lines.append("\tmov ax, fs") lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]") lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax") lines.append("\tmov fs, dx") lines.append("\tmov ax, gs") lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]") lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax") lines.append("\tmov gs, dx") if save_edx: lines.append("\tpop edx") if save_eax: lines.append("\tpop eax") # Swap stack pointer lines.append("\tmov esp, ebp") # Clean up arg space lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]") lines.append("\tmov dword ptr [ecx+TEB_SP], esp") # Restore stack and frame pointer lines.append("\tleave") # Return to guest if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0: lines.append(f"\tret {source_layout.stack_size}") else: lines.append("\tret") def _x64_register_by_slot_size(reg: str, slot_size: int) -> str: if slot_size == 8: return reg if reg in ["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp"]: if slot_size == 4: return f"e{reg[1:]}" elif slot_size == 2: return reg[1:] elif slot_size == 1: if reg in ["rax", "rbx", "rcx", "rdx"]: return f"{reg[1]}l" elif reg in ["rsi", "rdi"]: return f"{reg[1]}il" else: return f"{reg[1]}pl" if slot_size == 4: return f"{reg}d" if slot_size == 2: return f"{reg}w" if slot_size == 1: return f"{reg}b" raise NotImplementedError(f"Unsupported register {reg} for slot size {slot_size}") def _x64_ptr_type_by_slot_size(slot_size) -> str: if slot_size == 4: return "dword ptr" elif slot_size == 8: return "qword ptr" else: raise ValueError(f"Unsupported slot size {slot_size}") def emit_cc_thunk64(f: FuncInfo | TypedefInfo, lines: List[str]): if isinstance(f, TypedefInfo): # Host-to-guest call_target = "edi" align = 0 host_to_guest = True elif isinstance(f, FuncInfo): # Guest-to-host call_target = f.mangled align = 16 host_to_guest = False if f.variadic: # Variadic functions are not yet supported for calling convention conversion. assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, ( "Variadic functions must be cdecl" ) lines.append(f"\tjmp {call_target}") return source_layout = compute_arg_layout( f.args, f.source_cc, Arch.X86_64 if host_to_guest else Arch.X86, stack_offset=24 if host_to_guest else 20, skip_args=1 if host_to_guest else 0, ) target_layout = compute_arg_layout( f.args, f.target_cc, Arch.X86 if host_to_guest else Arch.X86_64 ) if host_to_guest: lines.append(".code64") # Save rbx and rbp lines.append("\tpush rbp") lines.append("\tpush rbx") # Stash host stack in r10 lines.append("\tmov r10, rsp") # Get current TEB lines.append("\tGET_TEB_HOST rbx") if sys.platform != "darwin": # Save FS base lines.append("\trdfsbase r9") lines.append("\tmov qword ptr [rbx+TEB_FSBASE], r9") # Save RSP and load guest stack lines.append("\tmov rbp, qword ptr [rbx+TEB_SP]") lines.append("\tmov qword ptr [rbx+TEB_SP], rsp") lines.append("\tmov rsp, rbp") # Allocate stack space for arguments if target_layout.stack_size > 0: lines.append(f"\tsub rsp, {target_layout.stack_size}") # Align stack if needed (must be done after allocating args) if align > 0: lines.append(f"\tand rsp, ~{align - 1}") # Transfer arguments for i, target in enumerate(target_layout.args): if target.stack_offset is None: raise NotImplementedError(f"Unexpected register argument {target}") source = source_layout.args[i] if source.stack_offset is not None: ptr_type = _x64_ptr_type_by_slot_size(source.slot_size) register = _x64_register_by_slot_size("rax", target.slot_size) lines.append( f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]" ) ptr_type = _x64_ptr_type_by_slot_size(target.slot_size) register = _x64_register_by_slot_size("rax", target.slot_size) elif source.register is not None: ptr_type = _x64_ptr_type_by_slot_size(target.slot_size) register = _x64_register_by_slot_size(source.register, target.slot_size) else: raise ValueError(f"Argument {i} is not a register or stack offset") lines.append(f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}") # Jump to 32-bit mode lines.append("\tLJMP32 rbx") # Setup FS selector lines.append("\tmov ax, word ptr [ebx+TEB_FS_SEL]") lines.append("\tmov fs, ax") # Call into target lines.append(f"\tcall {call_target}") # Get current TEB (32-bit code may clobber ebx) lines.append("\tmov ebx, fs:[TEB_SELF]") # Jump back to 64-bit lines.append("\tLJMP64 ebx") # Sign extend return value if necessary if f.return_type.sign_extended: lines.append("\tcdqe") if sys.platform != "darwin": # Restore FS base lines.append("\tmov r9, qword ptr [rbx+TEB_FSBASE]") lines.append("\twrfsbase r9") # Restore host stack lines.append("\tmov rsp, qword ptr [rbx+TEB_SP]") lines.append("\tmov qword ptr [rbx+TEB_SP], rbp") # Restore rbp, rbx and return lines.append("\tpop rbx") lines.append("\tpop rbp") lines.append("\tret") else: lines.append(".code32") # Save registers lines.append("\tpush ebp") lines.append("\tpush esi") lines.append("\tpush edi") lines.append("\tpush ebx") # Get current TEB lines.append("\tmov ebx, fs:[TEB_SELF]") if sys.platform != "darwin": # Save fs segment lines.append("\tmov di, fs") lines.append("\tmov word ptr [ebx+TEB_FS_SEL], di") # Jump back to 64-bit lines.append("\tLJMP64 ebx") if sys.platform != "darwin": # Restore FS base lines.append("\tmov r9, qword ptr [rbx+TEB_FSBASE]") lines.append("\twrfsbase r9") # Stash guest stack in r10 lines.append("\tmov r10, rsp") # Restore host stack lines.append("\tmov rbp, qword ptr [rbx+TEB_SP]") lines.append("\tmov qword ptr [rbx+TEB_SP], rsp") lines.append("\tmov rsp, rbp") # Allocate stack space for arguments if target_layout.stack_size > 0: lines.append(f"\tsub rsp, {target_layout.stack_size}") # Align stack if needed (must be done after allocating args) if align > 0: lines.append(f"\tand rsp, ~{align - 1}") # Transfer args for i, target in enumerate(target_layout.args): arg = f.args[i] source = source_layout.args[i] if target.stack_offset is not None: if source.stack_offset is not None: ptr_type = _x64_ptr_type_by_slot_size(source.slot_size) register = _x64_register_by_slot_size("rax", source.slot_size) lines.append( f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]" ) ptr_type = _x64_ptr_type_by_slot_size(target.slot_size) register = _x64_register_by_slot_size("rax", target.slot_size) elif source.register is not None: ptr_type = _x64_ptr_type_by_slot_size(target.slot_size) register = _x64_register_by_slot_size( source.register, target.slot_size ) else: raise ValueError(f"Argument {i} is not a register or stack offset") lines.append( f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}" ) elif target.register is not None: ptr_type = _x64_ptr_type_by_slot_size(source.slot_size) if source.slot_size == 4 and target.slot_size == 8: if arg.sign_extended: register = _x64_register_by_slot_size( target.register, source.slot_size ) lines.append( f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]" ) lines.append(f"\tmovsxd {target.register}, {register}") else: register = _x64_register_by_slot_size( target.register, source.slot_size ) lines.append( f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]" ) elif source.slot_size == 8 and target.slot_size == 8: lines.append( f"\tmov {target.register}, {ptr_type} [r10+{source.stack_offset}]" ) else: raise NotImplementedError( f"Unsupported conversion from {source.slot_size} to {target.slot_size}" ) # Call into target lines.append(f"\tcall {call_target}") # Restore host stack lines.append("\tmov rsp, qword ptr [rbx+TEB_SP]") lines.append("\tmov qword ptr [rbx+TEB_SP], rbp") # Jump to 32-bit mode lines.append("\tLJMP32 rbx") if sys.platform != "darwin": # Setup FS selector lines.append("\tmov di, word ptr [ebx+TEB_FS_SEL]") lines.append("\tmov fs, di") # Restore registers lines.append("\tpop ebx") lines.append("\tpop edi") lines.append("\tpop esi") lines.append("\tpop ebp") # Return to guest if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0: lines.append(f"\tret {source_layout.stack_size}") else: lines.append("\tret") def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str], arch: Arch): if arch == Arch.X86_64: return emit_cc_thunk64(f, lines) elif arch == Arch.X86: return emit_cc_thunk32(f, lines) def emit_guest_to_host_thunks( lines: List[str], dll: str, funcs: Iterable[FuncInfo], arch: Arch ) -> None: for f in funcs: thunk = f"thunk_{dll}_{f.name}" lines.append("") lines.append( f"# {f.qualified_ns}::{f.name} (source_cc={f.source_cc.name}, target_cc={f.target_cc.name}, variadic={f.variadic})" ) source_layout = compute_arg_layout(f.args, f.source_cc, Arch.X86) target_layout = compute_arg_layout(f.args, f.target_cc, arch) for i, arg in enumerate(f.args): details: List[str] = [] details.append(f"src={describe_arg_placement(source_layout.args[i])}") details.append(f"dst={describe_arg_placement(target_layout.args[i])}") details.append(f"class={arg.arg_class.value}") details.append(f"sign_extended={arg.sign_extended}") lines.append(f"\t# Arg {i} ({', '.join(details)})") lines.append(f"ASM_GLOBAL({thunk}, @function)") emit_cc_thunk(f, lines, arch) lines.append(f"ASM_END({thunk})") def emit_host_to_guest_thunks( lines: List[str], typedefs: Iterable[TypedefInfo], arch: Arch ) -> None: for f in typedefs: thunk = f"call_{f.name}" lines.append("") lines.append( f"# {f.name} (target_cc={f.target_cc.name}, variadic={f.variadic})" ) source_layout = compute_arg_layout(f.args, f.source_cc, arch, skip_args=1) target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86) for i, arg in enumerate(f.args): details: List[str] = [] details.append(f"src={describe_arg_placement(source_layout.args[i])}") details.append(f"dst={describe_arg_placement(target_layout.args[i])}") details.append(f"class={arg.arg_class.value}") details.append(f"sign_extended={arg.sign_extended}") lines.append(f"\t# Arg {i} ({', '.join(details)})") # details = [] # details.append(f"class={f.return_type.arg_class.value}") # details.append(f"sign_extended={f.return_type.sign_extended}") # lines.append(f"\t# Ret ({', '.join(details)})") lines.append(f"ASM_WEAK({thunk}, @function)") emit_cc_thunk(f, lines, arch) lines.append(f"ASM_END({thunk})") def emit_header_mapping( dll: str, funcs: Iterable[FuncInfo], typedefs: Iterable[TypedefInfo], variables: Iterable[VarInfo], arch: Arch, ) -> str: guard = f"WIBO_GEN_{dll.upper()}_THUNKS_H" lines: List[str] = [] lines.append("/* Auto-generated; DO NOT EDIT. */") lines.append(f"#ifndef {guard}") lines.append(f"#define {guard}") lines.append("#include ") lines.append("#include ") lines.append('#ifdef __cplusplus\nextern "C" {\n#endif') # Guest-to-host thunk functions for f in funcs: # Generate best-effort function prototype so that simple thunks can be called directly # in special cases (e.g. thunk_entry_stubBase) def _is_opaque(t: Type) -> bool: if ( t.kind == TypeKind.RECORD or t.kind == TypeKind.ENUM or t.kind == TypeKind.FUNCTIONPROTO or t.kind == TypeKind.FUNCTIONNOPROTO ): return True return t.kind == TypeKind.POINTER and _is_opaque( t.get_pointee().get_canonical() ) def _canonical_type_str(t: Type) -> str: c = t.get_canonical() if _is_opaque(c): return "void *" return c.spelling thunk = f"thunk_{dll}_{f.name}" args = [] for i, arg in enumerate(f.args): type_str = _canonical_type_str(arg.type) args.append(f"{type_str} arg{i}") param_list = ", ".join(args) return_type = _canonical_type_str(f.return_type.type) if arch == Arch.X86_64: cc_attr = "" elif f.source_cc == CallingConv.X86_STDCALL: cc_attr = "__attribute__((stdcall)) " elif f.source_cc == CallingConv.C: cc_attr = "__attribute__((cdecl)) " else: raise NotImplementedError( f"Unsupported calling convention {f.source_cc.name} for function {f.name}" ) lines.append(f"{cc_attr}{return_type} {thunk}({param_list});") # Host-to-guest thunk functions for td in typedefs: thunk = f"call_{td.name}" if td.variadic: continue params = [f"{td.name} fn"] for i, arg in enumerate(td.args): type_str = _type_to_string(arg.type) params.append(f"{type_str} arg{i}") param_list = ", ".join(params) return_type = _type_to_string(td.return_type.type) lines.append(f"{return_type} {thunk}({param_list});") lines.append("#ifdef __cplusplus\n}\n#endif") lines.append("") # name->address helper for resolveByName lines.append("static inline void *%sThunkByName(const char *name) {" % dll) for f in funcs: lines.append( f'\tif (strcmp(name, "{f.name}") == 0) return (void*)&thunk_{dll}_{f.name};' ) for v in variables: qualified = f"{v.qualified_ns}::{v.name}" if v.qualified_ns else v.name lines.append( f'\tif (strcmp(name, "{v.name}") == 0) return (void*)&{qualified};' ) lines.append("\treturn NULL;") lines.append("}") lines.append(f"#endif /* {guard} */\n") return "\n".join(lines) def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--dll", required=True, help="DLL name, e.g. kernel32") ap.add_argument("--headers", nargs="+", required=True, help="Header files to scan") ap.add_argument( "--namespace", dest="ns", default=None, help="Namespace filter, e.g. kernel32" ) ap.add_argument("--arch", choices=["x86", "x86_64"], default="x86") ap.add_argument( "--out-asm", type=Path, required=True, help="Output assembly file (.S)" ) ap.add_argument( "--out-hdr", type=Path, required=True, help="Output header file (.h)" ) ap.add_argument("-I", dest="incs", action="append", default=[]) args = ap.parse_args() if args.arch == "x86": arch = Arch.X86 target = "i686-linux-gnu" elif args.arch == "x86_64": arch = Arch.X86_64 if sys.platform == "darwin": target = "x86_64-apple-darwin" else: target = "x86_64-linux-gnu" else: raise ValueError(f"Unsupported architecture: {args.arch}") tu = parse_tu(args.headers, args.incs, target) funcs = collect_functions(tu, args.ns, arch) typedefs = collect_typedefs(tu, arch) variables = collect_variables(tu, args.ns) if not funcs and not typedefs and not variables: sys.stderr.write("No functions, typedefs, or variables found for generation.\n") return 1 lines: List[str] = [] lines.append("# Auto-generated thunks; DO NOT EDIT.") lines.append('#include "macros.S"') lines.append(".text") emit_guest_to_host_thunks(lines, args.dll, funcs, arch) emit_host_to_guest_thunks(lines, typedefs, arch) asm = "\n".join(lines) + "\n" hdr = emit_header_mapping(args.dll, funcs, typedefs, variables, arch) args.out_asm.parent.mkdir(parents=True, exist_ok=True) args.out_hdr.parent.mkdir(parents=True, exist_ok=True) args.out_asm.write_text(asm) args.out_hdr.write_text(hdr) return 0 if __name__ == "__main__": raise SystemExit(main())