mirror of
https://github.com/decompals/wibo.git
synced 2025-12-11 14:41:58 +00:00
1121 lines
36 KiB
Python
1121 lines
36 KiB
Python
#!/usr/bin/env -S uv run --script
|
|
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = ["clang==17.0.6"]
|
|
# ///
|
|
"""
|
|
Generate Windows ABI trampolines by scanning C++ prototypes using libclang.
|
|
|
|
This emits x86 trampolines for guest<->host calls.
|
|
"""
|
|
|
|
if __name__ == "__main__":
|
|
import script_venv
|
|
|
|
script_venv.bootstrap_venv(__file__)
|
|
|
|
import argparse
|
|
import ctypes
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum, IntEnum
|
|
from pathlib import Path
|
|
from typing import Iterable, List, Optional
|
|
|
|
from clang.cindex import (
|
|
Config,
|
|
Cursor,
|
|
CursorKind,
|
|
Index,
|
|
StorageClass,
|
|
TranslationUnit,
|
|
Type,
|
|
TypeKind,
|
|
conf,
|
|
)
|
|
from clang.cindex import Type as CXType
|
|
|
|
# Allow libclang path to be specified via environment variable
|
|
if "LIBCLANG_PATH" in os.environ:
|
|
libclang_path = os.environ["LIBCLANG_PATH"]
|
|
if os.path.isfile(libclang_path):
|
|
Config.set_library_file(libclang_path)
|
|
elif os.path.isdir(libclang_path):
|
|
Config.set_library_path(libclang_path)
|
|
else:
|
|
sys.stderr.write(
|
|
f"Warning: LIBCLANG_PATH={libclang_path} is not a file or directory\n"
|
|
)
|
|
|
|
|
|
class Arch(str, Enum):
|
|
X86 = "x86"
|
|
X86_64 = "x86_64"
|
|
|
|
|
|
class CallingConv(IntEnum):
|
|
"""CXCallingConv enum values from clang-c/Index.h"""
|
|
|
|
DEFAULT = 0
|
|
C = 1
|
|
X86_STDCALL = 2
|
|
X86_FASTCALL = 3
|
|
X86_THISCALL = 4
|
|
X86_PASCAL = 5
|
|
AAPCS = 6
|
|
AAPCS_VFP = 7
|
|
X86_REGCALL = 8
|
|
INTELOCLBICC = 9
|
|
WIN64 = 10
|
|
X86_64_WIN64 = 11
|
|
X86_64_SYSV = 12
|
|
X86_VECTORCALL = 13
|
|
SWIFT = 14
|
|
PRESERVEMOST = 15
|
|
PRESERVEALL = 16
|
|
AARCH64_VECTORCALL = 17
|
|
SWIFTASYNC = 18
|
|
AARCH64_SVEPCS = 19
|
|
M68K_RTD = 20
|
|
INVALID = 100
|
|
UNEXPOSED = 200
|
|
|
|
|
|
# Register the clang_getFunctionTypeCallingConv function
|
|
_get_calling_conv = conf.lib.clang_getFunctionTypeCallingConv
|
|
_get_calling_conv.argtypes = [CXType]
|
|
_get_calling_conv.restype = ctypes.c_int
|
|
|
|
|
|
def _get_function_calling_conv(func_type: CXType) -> CallingConv:
|
|
"""
|
|
Get the calling convention of a function type.
|
|
"""
|
|
return CallingConv(_get_calling_conv(func_type))
|
|
|
|
|
|
class ArgClass(str, Enum):
|
|
INT = "int"
|
|
MEMORY = "memory"
|
|
|
|
|
|
@dataclass
|
|
class ArgInfo:
|
|
type: Type
|
|
arg_class: ArgClass
|
|
sign_extended: bool
|
|
|
|
|
|
@dataclass
|
|
class ArgPlacement:
|
|
size: int
|
|
slot_size: int
|
|
stack_offset: Optional[int] = None
|
|
register: Optional[str] = None
|
|
|
|
def __init__(self, arg: ArgInfo, arch: Arch):
|
|
self.size = arg.type.get_canonical().get_size()
|
|
self.slot_size = _slot_size_for_arch(arg, arch)
|
|
self.register = None
|
|
self.stack_offset = None
|
|
|
|
|
|
@dataclass
|
|
class FuncInfo:
|
|
qualified_ns: str
|
|
name: str
|
|
mangled: str
|
|
source_cc: CallingConv
|
|
target_cc: CallingConv
|
|
variadic: bool
|
|
return_type: ArgInfo
|
|
args: List[ArgInfo] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class TypedefInfo:
|
|
name: str
|
|
source_cc: CallingConv
|
|
target_cc: CallingConv
|
|
variadic: bool
|
|
return_type: ArgInfo
|
|
args: List[ArgInfo] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class VarInfo:
|
|
qualified_ns: str
|
|
name: str
|
|
|
|
|
|
def parse_tu(
|
|
headers: List[str], include_dirs: List[str], target: str
|
|
) -> TranslationUnit:
|
|
# Construct a tiny TU that includes the requested headers
|
|
tu_source = "\n".join([f'#include "{h}"' for h in headers]) + "\n"
|
|
with tempfile.NamedTemporaryFile("w", suffix=".cpp") as tf:
|
|
tf.write(tu_source)
|
|
tf.flush()
|
|
args = [
|
|
"-x",
|
|
"c++",
|
|
"-std=c++17",
|
|
"-target",
|
|
target,
|
|
"-DWIBO_CODEGEN=1",
|
|
] + [arg for inc in include_dirs for arg in ("-I", inc)]
|
|
|
|
index = Index.create()
|
|
tu = index.parse(
|
|
tf.name, args=args, options=TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
|
|
)
|
|
for d in tu.diagnostics:
|
|
if d.severity >= d.Warning:
|
|
sys.stderr.write(str(d) + "\n")
|
|
return tu
|
|
|
|
|
|
def _cursor_namespace(cursor: Cursor) -> List[str]:
|
|
ns: List[str] = []
|
|
c = cursor
|
|
while c is not None and c.kind != CursorKind.TRANSLATION_UNIT:
|
|
if c.kind == CursorKind.NAMESPACE and c.spelling:
|
|
ns.append(c.spelling)
|
|
c = c.semantic_parent
|
|
return list(reversed(ns))
|
|
|
|
|
|
def _source_cc_from_annotations(func: Cursor) -> CallingConv:
|
|
for child in func.get_children():
|
|
if child.kind == CursorKind.ANNOTATE_ATTR:
|
|
if child.spelling == "CC:fastcall":
|
|
return CallingConv.X86_FASTCALL
|
|
elif child.spelling == "CC:stdcall":
|
|
return CallingConv.X86_STDCALL
|
|
elif child.spelling == "CC:cdecl":
|
|
return CallingConv.C
|
|
return CallingConv.DEFAULT
|
|
|
|
|
|
def _is_handle_typedef(arg_type: CXType) -> bool:
|
|
"""Check if a type is a HANDLE-like typedef (HWND, HINSTANCE, etc.)."""
|
|
t = arg_type
|
|
# Trace through ELABORATED and TYPEDEF to find the original typedef name
|
|
while t.kind == TypeKind.ELABORATED or t.kind == TypeKind.TYPEDEF:
|
|
if t.kind == TypeKind.TYPEDEF:
|
|
decl = t.get_declaration()
|
|
name = decl.spelling
|
|
# Windows HANDLE types conventionally start with 'H'
|
|
if name and name.startswith("H") and name.isupper():
|
|
return True
|
|
t = decl.underlying_typedef_type
|
|
elif t.kind == TypeKind.ELABORATED:
|
|
named = t.get_named_type()
|
|
if named is None:
|
|
break
|
|
t = named
|
|
else:
|
|
break
|
|
return False
|
|
|
|
|
|
SIGNED_KINDS = [
|
|
TypeKind.SCHAR,
|
|
TypeKind.CHAR_S,
|
|
TypeKind.SHORT,
|
|
TypeKind.INT,
|
|
TypeKind.LONG,
|
|
TypeKind.LONGLONG,
|
|
TypeKind.INT128,
|
|
]
|
|
|
|
|
|
def _calculate_arg_info(t: Type) -> ArgInfo:
|
|
canonical = t.get_canonical()
|
|
|
|
# if canonical.kind == TypeKind.RECORD:
|
|
# arg_class = ArgClass.MEMORY
|
|
# else:
|
|
arg_class = ArgClass.INT
|
|
|
|
if canonical.kind == TypeKind.POINTER:
|
|
pointee = canonical.get_pointee()
|
|
if pointee.kind == TypeKind.POINTER:
|
|
print(f"Bugprone: Pointer to pointer ({_type_to_string(t)})")
|
|
|
|
# Sign-extend signed integers and HANDLE-like typedefs
|
|
is_sign_extended = canonical.kind in SIGNED_KINDS or _is_handle_typedef(t)
|
|
|
|
return ArgInfo(
|
|
arg_class=arg_class,
|
|
sign_extended=is_sign_extended,
|
|
type=t,
|
|
)
|
|
|
|
|
|
def _collect_args(func_type: CXType) -> List[ArgInfo]:
|
|
"""Collect argument information for a function."""
|
|
args: List[ArgInfo] = []
|
|
for t in func_type.argument_types():
|
|
args.append(_calculate_arg_info(t))
|
|
return args
|
|
|
|
|
|
def _slot_size_for_arch(arg: ArgInfo, arch: Arch) -> int:
|
|
"""Return the slot size (in bytes) used to pass an argument on the given architecture."""
|
|
canonical = arg.type.get_canonical()
|
|
if canonical.kind == TypeKind.POINTER:
|
|
return 8 if arch == Arch.X86_64 else 4
|
|
size = canonical.get_size()
|
|
if arch == Arch.X86:
|
|
if size <= 4:
|
|
return 4
|
|
if size <= 8:
|
|
return 8
|
|
elif arch == Arch.X86_64:
|
|
if size <= 8:
|
|
return 8
|
|
raise NotImplementedError(
|
|
f"Argument size {size} not supported for architecture {arch.value}"
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ArgLayout:
|
|
args: List[ArgPlacement]
|
|
stack_size: int
|
|
|
|
|
|
def compute_arg_layout(
|
|
args: List[ArgInfo],
|
|
cc: CallingConv,
|
|
arch: Arch,
|
|
stack_offset: int = 0,
|
|
skip_args: int = 0,
|
|
) -> ArgLayout:
|
|
"""Compute how each argument is passed for the given calling convention and arch."""
|
|
|
|
placements: List[ArgPlacement] = []
|
|
stack_size = 0
|
|
gpr_order: List[str] = []
|
|
gpr_index = skip_args
|
|
|
|
if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL:
|
|
gpr_order = ["ecx", "edx"]
|
|
elif arch == Arch.X86_64 and cc == CallingConv.C:
|
|
gpr_order = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
|
|
|
|
# Offset our stack based on number of extra arguments
|
|
# We assume that every arg represented by skip_args fits in a register
|
|
register_size = 8 if arch == Arch.X86_64 else 4
|
|
consumed_stack = max(0, skip_args - len(gpr_order)) * register_size
|
|
stack_offset += consumed_stack
|
|
stack_size += consumed_stack
|
|
|
|
def _push_stack(arg: ArgInfo) -> None:
|
|
nonlocal stack_offset
|
|
nonlocal stack_size
|
|
placement = ArgPlacement(arg, arch)
|
|
placement.stack_offset = stack_offset
|
|
placements.append(placement)
|
|
stack_offset += placement.slot_size
|
|
stack_size += placement.slot_size
|
|
|
|
def _push_register(arg: ArgInfo) -> None:
|
|
nonlocal gpr_index
|
|
placement = ArgPlacement(arg, arch)
|
|
placement.register = gpr_order[gpr_index]
|
|
placements.append(placement)
|
|
gpr_index += 1
|
|
|
|
# Special case for x86 fastcall: stop using registers if any spill onto the stack
|
|
if arch == Arch.X86 and cc == CallingConv.X86_FASTCALL:
|
|
stack_args_start = 0
|
|
for i in range(min(len(gpr_order), len(args))):
|
|
if gpr_index >= len(gpr_order):
|
|
break
|
|
arg = args[i]
|
|
slot_size = _slot_size_for_arch(arg, arch)
|
|
if arg.arg_class == ArgClass.INT and slot_size == 4:
|
|
_push_register(arg)
|
|
stack_args_start += 1
|
|
else:
|
|
break
|
|
|
|
for i in range(stack_args_start, len(args)):
|
|
_push_stack(args[i])
|
|
else:
|
|
for arg in args:
|
|
slot_size = _slot_size_for_arch(arg, arch)
|
|
if (
|
|
arg.arg_class == ArgClass.INT
|
|
and slot_size <= register_size
|
|
and gpr_index < len(gpr_order)
|
|
):
|
|
_push_register(arg)
|
|
else:
|
|
_push_stack(arg)
|
|
|
|
return ArgLayout(args=placements, stack_size=stack_size)
|
|
|
|
|
|
def describe_arg_placement(placement: ArgPlacement) -> str:
|
|
if placement.register is not None:
|
|
return f"{placement.register}[{placement.slot_size}]"
|
|
if placement.stack_offset is not None:
|
|
return f"stack+{placement.stack_offset}[{placement.slot_size}]"
|
|
raise ValueError(f"Unassigned placement {placement}")
|
|
|
|
|
|
def collect_functions(
|
|
tu: TranslationUnit, ns_filter: Optional[str], arch: Arch
|
|
) -> List[FuncInfo]:
|
|
want_ns = ns_filter.split("::") if ns_filter else None
|
|
out: dict[str, FuncInfo] = {}
|
|
|
|
def visit(node: Cursor) -> None:
|
|
if node.kind == CursorKind.FUNCTION_DECL:
|
|
ns_parts = _cursor_namespace(node)
|
|
if want_ns is not None and ns_parts != want_ns:
|
|
return
|
|
name = node.spelling
|
|
if not name:
|
|
return
|
|
source_cc = _source_cc_from_annotations(node)
|
|
if source_cc == CallingConv.DEFAULT:
|
|
return # No CC annotation; skip
|
|
out[name] = FuncInfo(
|
|
qualified_ns="::".join(ns_parts),
|
|
name=name,
|
|
mangled=node.mangled_name or name,
|
|
source_cc=source_cc,
|
|
target_cc=_get_function_calling_conv(node.type),
|
|
variadic=node.type.is_function_variadic(),
|
|
return_type=_calculate_arg_info(node.type.get_result()),
|
|
args=_collect_args(node.type),
|
|
)
|
|
|
|
# Recurse into children
|
|
if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE):
|
|
for c in node.get_children():
|
|
visit(c)
|
|
|
|
if tu.cursor is not None:
|
|
visit(tu.cursor)
|
|
return sorted(out.values(), key=lambda f: f.name)
|
|
|
|
|
|
def _type_to_string(t: CXType) -> str:
|
|
"""Convert a CXType to a C type string."""
|
|
spelling = t.spelling
|
|
# Clean up common type spellings
|
|
spelling = (
|
|
spelling.replace("struct ", "").replace("union ", "").replace("enum ", "")
|
|
)
|
|
return spelling
|
|
|
|
|
|
def collect_typedefs(tu: TranslationUnit, arch: Arch) -> List[TypedefInfo]:
|
|
"""Collect function pointer typedefs and type aliases from the translation unit."""
|
|
out: dict[str, TypedefInfo] = {}
|
|
|
|
def process_function_pointer_type(
|
|
name: str, node: Cursor, func_type: CXType
|
|
) -> None:
|
|
"""Process a function pointer type and add it to the output."""
|
|
if not name:
|
|
return
|
|
|
|
# Determine calling convention
|
|
source_cc = _get_function_calling_conv(func_type)
|
|
target_cc = _source_cc_from_annotations(node)
|
|
if target_cc == CallingConv.DEFAULT:
|
|
return # No CC annotation; skip
|
|
|
|
out[name] = TypedefInfo(
|
|
name=name,
|
|
source_cc=source_cc,
|
|
target_cc=target_cc,
|
|
variadic=func_type.is_function_variadic(),
|
|
return_type=_calculate_arg_info(func_type.get_result()),
|
|
args=_collect_args(func_type),
|
|
)
|
|
|
|
def visit(node: Cursor) -> None:
|
|
if node.kind == CursorKind.TYPEDEF_DECL:
|
|
name = node.spelling
|
|
if not name:
|
|
return
|
|
underlying = node.underlying_typedef_type
|
|
if underlying.kind == TypeKind.POINTER:
|
|
pointee = underlying.get_pointee()
|
|
if pointee.kind == TypeKind.FUNCTIONPROTO:
|
|
process_function_pointer_type(name, node, pointee)
|
|
|
|
# Recurse into children
|
|
if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE):
|
|
for c in node.get_children():
|
|
visit(c)
|
|
|
|
if tu.cursor is not None:
|
|
visit(tu.cursor)
|
|
return sorted(out.values(), key=lambda t: t.name)
|
|
|
|
|
|
def collect_variables(tu: TranslationUnit, ns_filter: Optional[str]) -> List[VarInfo]:
|
|
"""Collect extern variable declarations from the translation unit."""
|
|
want_ns = ns_filter.split("::") if ns_filter else None
|
|
out: dict[str, VarInfo] = {}
|
|
|
|
def visit(node: Cursor) -> None:
|
|
if node.kind == CursorKind.VAR_DECL:
|
|
if node.storage_class != StorageClass.EXTERN or node.is_definition():
|
|
return
|
|
ns_parts = _cursor_namespace(node)
|
|
if want_ns is not None and ns_parts != want_ns:
|
|
return
|
|
name = node.spelling
|
|
if not name:
|
|
return
|
|
out[name] = VarInfo(
|
|
qualified_ns="::".join(ns_parts),
|
|
name=name,
|
|
)
|
|
|
|
if node.kind in (CursorKind.TRANSLATION_UNIT, CursorKind.NAMESPACE):
|
|
for c in node.get_children():
|
|
visit(c)
|
|
|
|
if tu.cursor is not None:
|
|
visit(tu.cursor)
|
|
return sorted(out.values(), key=lambda v: v.name)
|
|
|
|
|
|
def emit_cc_thunk32(f: FuncInfo | TypedefInfo, lines: List[str]):
|
|
if isinstance(f, TypedefInfo):
|
|
# Host-to-guest
|
|
call_target = "[eax+4]"
|
|
align = 0
|
|
host_to_guest = True
|
|
elif isinstance(f, FuncInfo):
|
|
# Guest-to-host
|
|
call_target = f.mangled
|
|
align = 16
|
|
host_to_guest = False
|
|
|
|
if f.variadic:
|
|
# Variadic functions are not yet supported for calling convention conversion.
|
|
assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, (
|
|
"Variadic functions must be cdecl"
|
|
)
|
|
lines.append(f"\tjmp {call_target}")
|
|
return
|
|
|
|
source_layout = compute_arg_layout(
|
|
f.args,
|
|
f.source_cc,
|
|
Arch.X86,
|
|
stack_offset=4,
|
|
skip_args=1 if host_to_guest else 0,
|
|
)
|
|
target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86)
|
|
|
|
# Get current TEB
|
|
if host_to_guest:
|
|
lines.append("\tGET_TEB_HOST ecx")
|
|
else:
|
|
lines.append("\tmov ecx, fs:[TEB_SELF]")
|
|
|
|
# Swap fs and gs
|
|
lines.append("\tmov ax, fs")
|
|
lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]")
|
|
lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax")
|
|
lines.append("\tmov fs, dx")
|
|
lines.append("\tmov ax, gs")
|
|
lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]")
|
|
lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax")
|
|
lines.append("\tmov gs, dx")
|
|
|
|
# Store guest stack pointer in eax for arg access
|
|
if len(f.args) > 0 or host_to_guest:
|
|
lines.append("\tmov eax, esp")
|
|
|
|
# Swap stack pointer
|
|
lines.append("\tpush ebp")
|
|
lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]")
|
|
lines.append("\tmov dword ptr [ecx+TEB_SP], esp")
|
|
lines.append("\tmov esp, ebp")
|
|
|
|
# Allocate stack space for arguments
|
|
if target_layout.stack_size > 0:
|
|
lines.append(f"\tsub esp, {target_layout.stack_size}")
|
|
|
|
# Align stack if needed (must be done after allocating args)
|
|
if align > 0:
|
|
lines.append(f"\tand esp, ~{align - 1}")
|
|
|
|
# Copy args onto stack for the callee
|
|
for idx, target in enumerate(target_layout.args):
|
|
if target.stack_offset is None:
|
|
continue
|
|
|
|
source = source_layout.args[idx]
|
|
if source.stack_offset is None:
|
|
raise NotImplementedError(
|
|
f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented"
|
|
)
|
|
|
|
if source.slot_size != target.slot_size:
|
|
raise NotImplementedError(
|
|
f"Argument {idx} requires size conversion {source.slot_size}->{target.slot_size}; not implemented"
|
|
)
|
|
|
|
for off in range(0, target.slot_size, 4):
|
|
lines.append(f"\tmov ecx, [eax+{source.stack_offset + off}]")
|
|
lines.append(f"\tmov [esp+{target.stack_offset + off}], ecx")
|
|
|
|
# Load args into registers as needed
|
|
for idx, target in enumerate(target_layout.args):
|
|
if target.register is None:
|
|
continue
|
|
|
|
source = source_layout.args[idx]
|
|
if source.stack_offset is None:
|
|
raise NotImplementedError(
|
|
f"Source calling convention {f.source_cc.name} requires register argument {idx}; not implemented"
|
|
)
|
|
|
|
lines.append(f"\tmov {target.register}, [eax+{source.stack_offset}]")
|
|
|
|
# Call into target
|
|
lines.append(f"\tcall {call_target}")
|
|
|
|
# Determine if we can clobber eax/edx
|
|
if f.return_type.arg_class != ArgClass.INT:
|
|
raise NotImplementedError(
|
|
f"Unsupported return type class {f.return_type.arg_class.value} for function {f.name}"
|
|
)
|
|
return_size = f.return_type.type.get_size()
|
|
save_eax = return_size > 0
|
|
save_edx = return_size > 4
|
|
if return_size > 8:
|
|
raise NotImplementedError(
|
|
f"Return size {return_size} not supported for function {f.name}"
|
|
)
|
|
|
|
# Restore segment registers
|
|
if save_eax:
|
|
lines.append("\tpush eax")
|
|
if save_edx:
|
|
lines.append("\tpush edx")
|
|
if host_to_guest:
|
|
lines.append("\tmov ecx, fs:[TEB_SELF]")
|
|
else:
|
|
lines.append("\tGET_TEB_HOST ecx")
|
|
lines.append("\tmov ax, fs")
|
|
lines.append("\tmov dx, word ptr [ecx+TEB_FS_SEL]")
|
|
lines.append("\tmov word ptr [ecx+TEB_FS_SEL], ax")
|
|
lines.append("\tmov fs, dx")
|
|
lines.append("\tmov ax, gs")
|
|
lines.append("\tmov dx, word ptr [ecx+TEB_GS_SEL]")
|
|
lines.append("\tmov word ptr [ecx+TEB_GS_SEL], ax")
|
|
lines.append("\tmov gs, dx")
|
|
if save_edx:
|
|
lines.append("\tpop edx")
|
|
if save_eax:
|
|
lines.append("\tpop eax")
|
|
|
|
# Swap stack pointer
|
|
lines.append("\tmov esp, ebp") # Clean up arg space
|
|
lines.append("\tmov ebp, dword ptr [ecx+TEB_SP]")
|
|
lines.append("\tmov dword ptr [ecx+TEB_SP], esp")
|
|
|
|
# Restore stack and frame pointer
|
|
lines.append("\tleave")
|
|
|
|
# Return to guest
|
|
if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0:
|
|
lines.append(f"\tret {source_layout.stack_size}")
|
|
else:
|
|
lines.append("\tret")
|
|
|
|
|
|
def _x64_register_by_slot_size(reg: str, slot_size: int) -> str:
|
|
if slot_size == 8:
|
|
return reg
|
|
if reg in ["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp"]:
|
|
if slot_size == 4:
|
|
return f"e{reg[1:]}"
|
|
elif slot_size == 2:
|
|
return reg[1:]
|
|
elif slot_size == 1:
|
|
if reg in ["rax", "rbx", "rcx", "rdx"]:
|
|
return f"{reg[1]}l"
|
|
elif reg in ["rsi", "rdi"]:
|
|
return f"{reg[1]}il"
|
|
else:
|
|
return f"{reg[1]}pl"
|
|
if slot_size == 4:
|
|
return f"{reg}d"
|
|
if slot_size == 2:
|
|
return f"{reg}w"
|
|
if slot_size == 1:
|
|
return f"{reg}b"
|
|
raise NotImplementedError(f"Unsupported register {reg} for slot size {slot_size}")
|
|
|
|
|
|
def _x64_ptr_type_by_slot_size(slot_size) -> str:
|
|
if slot_size == 4:
|
|
return "dword ptr"
|
|
elif slot_size == 8:
|
|
return "qword ptr"
|
|
else:
|
|
raise ValueError(f"Unsupported slot size {slot_size}")
|
|
|
|
|
|
def emit_cc_thunk64(f: FuncInfo | TypedefInfo, lines: List[str]):
|
|
if isinstance(f, TypedefInfo):
|
|
# Host-to-guest
|
|
call_target = "edi"
|
|
align = 0
|
|
host_to_guest = True
|
|
elif isinstance(f, FuncInfo):
|
|
# Guest-to-host
|
|
call_target = f.mangled
|
|
align = 16
|
|
host_to_guest = False
|
|
|
|
if f.variadic:
|
|
# Variadic functions are not yet supported for calling convention conversion.
|
|
assert f.source_cc == CallingConv.C and f.target_cc == CallingConv.C, (
|
|
"Variadic functions must be cdecl"
|
|
)
|
|
lines.append(f"\tjmp {call_target}")
|
|
return
|
|
|
|
source_layout = compute_arg_layout(
|
|
f.args,
|
|
f.source_cc,
|
|
Arch.X86_64 if host_to_guest else Arch.X86,
|
|
stack_offset=24 if host_to_guest else 20,
|
|
skip_args=1 if host_to_guest else 0,
|
|
)
|
|
target_layout = compute_arg_layout(
|
|
f.args, f.target_cc, Arch.X86 if host_to_guest else Arch.X86_64
|
|
)
|
|
|
|
if host_to_guest:
|
|
lines.append(".code64")
|
|
|
|
# Save rbx and rbp
|
|
lines.append("\tpush rbp")
|
|
lines.append("\tpush rbx")
|
|
|
|
# Stash host stack in r10
|
|
lines.append("\tmov r10, rsp")
|
|
|
|
# Get current TEB
|
|
lines.append("\tGET_TEB_HOST rbx")
|
|
|
|
if sys.platform != "darwin":
|
|
# Save FS base
|
|
lines.append("\trdfsbase r9")
|
|
lines.append("\tmov qword ptr [rbx+TEB_FSBASE], r9")
|
|
|
|
# Save RSP and load guest stack
|
|
lines.append("\tmov rbp, qword ptr [rbx+TEB_SP]")
|
|
lines.append("\tmov qword ptr [rbx+TEB_SP], rsp")
|
|
lines.append("\tmov rsp, rbp")
|
|
|
|
# Allocate stack space for arguments
|
|
if target_layout.stack_size > 0:
|
|
lines.append(f"\tsub rsp, {target_layout.stack_size}")
|
|
|
|
# Align stack if needed (must be done after allocating args)
|
|
if align > 0:
|
|
lines.append(f"\tand rsp, ~{align - 1}")
|
|
|
|
# Transfer arguments
|
|
for i, target in enumerate(target_layout.args):
|
|
if target.stack_offset is None:
|
|
raise NotImplementedError(f"Unexpected register argument {target}")
|
|
|
|
source = source_layout.args[i]
|
|
if source.stack_offset is not None:
|
|
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
|
|
register = _x64_register_by_slot_size("rax", target.slot_size)
|
|
lines.append(
|
|
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
|
|
)
|
|
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
|
|
register = _x64_register_by_slot_size("rax", target.slot_size)
|
|
elif source.register is not None:
|
|
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
|
|
register = _x64_register_by_slot_size(source.register, target.slot_size)
|
|
else:
|
|
raise ValueError(f"Argument {i} is not a register or stack offset")
|
|
lines.append(f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}")
|
|
|
|
# Jump to 32-bit mode
|
|
lines.append("\tLJMP32 rbx")
|
|
|
|
# Setup FS selector
|
|
lines.append("\tmov ax, word ptr [ebx+TEB_FS_SEL]")
|
|
lines.append("\tmov fs, ax")
|
|
|
|
# Call into target
|
|
lines.append(f"\tcall {call_target}")
|
|
|
|
# Get current TEB (32-bit code may clobber ebx)
|
|
lines.append("\tmov ebx, fs:[TEB_SELF]")
|
|
|
|
# Jump back to 64-bit
|
|
lines.append("\tLJMP64 ebx")
|
|
|
|
# Sign extend return value if necessary
|
|
if f.return_type.sign_extended:
|
|
lines.append("\tcdqe")
|
|
|
|
if sys.platform != "darwin":
|
|
# Restore FS base
|
|
lines.append("\tmov r9, qword ptr [rbx+TEB_FSBASE]")
|
|
lines.append("\twrfsbase r9")
|
|
|
|
# Restore host stack
|
|
lines.append("\tmov rsp, qword ptr [rbx+TEB_SP]")
|
|
lines.append("\tmov qword ptr [rbx+TEB_SP], rbp")
|
|
|
|
# Restore rbp, rbx and return
|
|
lines.append("\tpop rbx")
|
|
lines.append("\tpop rbp")
|
|
lines.append("\tret")
|
|
else:
|
|
lines.append(".code32")
|
|
|
|
# Save registers
|
|
lines.append("\tpush ebp")
|
|
lines.append("\tpush esi")
|
|
lines.append("\tpush edi")
|
|
lines.append("\tpush ebx")
|
|
|
|
# Get current TEB
|
|
lines.append("\tmov ebx, fs:[TEB_SELF]")
|
|
|
|
if sys.platform != "darwin":
|
|
# Save fs segment
|
|
lines.append("\tmov di, fs")
|
|
lines.append("\tmov word ptr [ebx+TEB_FS_SEL], di")
|
|
|
|
# Jump back to 64-bit
|
|
lines.append("\tLJMP64 ebx")
|
|
|
|
if sys.platform != "darwin":
|
|
# Restore FS base
|
|
lines.append("\tmov r9, qword ptr [rbx+TEB_FSBASE]")
|
|
lines.append("\twrfsbase r9")
|
|
|
|
# Stash guest stack in r10
|
|
lines.append("\tmov r10, rsp")
|
|
|
|
# Restore host stack
|
|
lines.append("\tmov rbp, qword ptr [rbx+TEB_SP]")
|
|
lines.append("\tmov qword ptr [rbx+TEB_SP], rsp")
|
|
lines.append("\tmov rsp, rbp")
|
|
|
|
# Allocate stack space for arguments
|
|
if target_layout.stack_size > 0:
|
|
lines.append(f"\tsub rsp, {target_layout.stack_size}")
|
|
|
|
# Align stack if needed (must be done after allocating args)
|
|
if align > 0:
|
|
lines.append(f"\tand rsp, ~{align - 1}")
|
|
|
|
# Transfer args
|
|
for i, target in enumerate(target_layout.args):
|
|
arg = f.args[i]
|
|
source = source_layout.args[i]
|
|
|
|
if target.stack_offset is not None:
|
|
if source.stack_offset is not None:
|
|
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
|
|
register = _x64_register_by_slot_size("rax", source.slot_size)
|
|
lines.append(
|
|
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
|
|
)
|
|
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
|
|
register = _x64_register_by_slot_size("rax", target.slot_size)
|
|
elif source.register is not None:
|
|
ptr_type = _x64_ptr_type_by_slot_size(target.slot_size)
|
|
register = _x64_register_by_slot_size(
|
|
source.register, target.slot_size
|
|
)
|
|
else:
|
|
raise ValueError(f"Argument {i} is not a register or stack offset")
|
|
lines.append(
|
|
f"\tmov {ptr_type} [rsp+{target.stack_offset}], {register}"
|
|
)
|
|
elif target.register is not None:
|
|
ptr_type = _x64_ptr_type_by_slot_size(source.slot_size)
|
|
if source.slot_size == 4 and target.slot_size == 8:
|
|
if arg.sign_extended:
|
|
register = _x64_register_by_slot_size(
|
|
target.register, source.slot_size
|
|
)
|
|
lines.append(
|
|
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
|
|
)
|
|
lines.append(f"\tmovsxd {target.register}, {register}")
|
|
else:
|
|
register = _x64_register_by_slot_size(
|
|
target.register, source.slot_size
|
|
)
|
|
lines.append(
|
|
f"\tmov {register}, {ptr_type} [r10+{source.stack_offset}]"
|
|
)
|
|
elif source.slot_size == 8 and target.slot_size == 8:
|
|
lines.append(
|
|
f"\tmov {target.register}, {ptr_type} [r10+{source.stack_offset}]"
|
|
)
|
|
else:
|
|
raise NotImplementedError(
|
|
f"Unsupported conversion from {source.slot_size} to {target.slot_size}"
|
|
)
|
|
|
|
# Call into target
|
|
lines.append(f"\tcall {call_target}")
|
|
|
|
# Restore host stack
|
|
lines.append("\tmov rsp, qword ptr [rbx+TEB_SP]")
|
|
lines.append("\tmov qword ptr [rbx+TEB_SP], rbp")
|
|
|
|
# Jump to 32-bit mode
|
|
lines.append("\tLJMP32 rbx")
|
|
|
|
if sys.platform != "darwin":
|
|
# Setup FS selector
|
|
lines.append("\tmov di, word ptr [ebx+TEB_FS_SEL]")
|
|
lines.append("\tmov fs, di")
|
|
|
|
# Restore registers
|
|
lines.append("\tpop ebx")
|
|
lines.append("\tpop edi")
|
|
lines.append("\tpop esi")
|
|
lines.append("\tpop ebp")
|
|
|
|
# Return to guest
|
|
if f.source_cc == CallingConv.X86_STDCALL and source_layout.stack_size > 0:
|
|
lines.append(f"\tret {source_layout.stack_size}")
|
|
else:
|
|
lines.append("\tret")
|
|
|
|
|
|
def emit_cc_thunk(f: FuncInfo | TypedefInfo, lines: List[str], arch: Arch):
|
|
if arch == Arch.X86_64:
|
|
return emit_cc_thunk64(f, lines)
|
|
elif arch == Arch.X86:
|
|
return emit_cc_thunk32(f, lines)
|
|
|
|
|
|
def emit_guest_to_host_thunks(
|
|
lines: List[str], dll: str, funcs: Iterable[FuncInfo], arch: Arch
|
|
) -> None:
|
|
for f in funcs:
|
|
thunk = f"thunk_{dll}_{f.name}"
|
|
lines.append("")
|
|
lines.append(
|
|
f"# {f.qualified_ns}::{f.name} (source_cc={f.source_cc.name}, target_cc={f.target_cc.name}, variadic={f.variadic})"
|
|
)
|
|
source_layout = compute_arg_layout(f.args, f.source_cc, Arch.X86)
|
|
target_layout = compute_arg_layout(f.args, f.target_cc, arch)
|
|
for i, arg in enumerate(f.args):
|
|
details: List[str] = []
|
|
details.append(f"src={describe_arg_placement(source_layout.args[i])}")
|
|
details.append(f"dst={describe_arg_placement(target_layout.args[i])}")
|
|
details.append(f"class={arg.arg_class.value}")
|
|
details.append(f"sign_extended={arg.sign_extended}")
|
|
lines.append(f"\t# Arg {i} ({', '.join(details)})")
|
|
lines.append(f"ASM_GLOBAL({thunk}, @function)")
|
|
emit_cc_thunk(f, lines, arch)
|
|
lines.append(f"ASM_END({thunk})")
|
|
|
|
|
|
def emit_host_to_guest_thunks(
|
|
lines: List[str], typedefs: Iterable[TypedefInfo], arch: Arch
|
|
) -> None:
|
|
for f in typedefs:
|
|
thunk = f"call_{f.name}"
|
|
lines.append("")
|
|
lines.append(
|
|
f"# {f.name} (target_cc={f.target_cc.name}, variadic={f.variadic})"
|
|
)
|
|
source_layout = compute_arg_layout(f.args, f.source_cc, arch, skip_args=1)
|
|
target_layout = compute_arg_layout(f.args, f.target_cc, Arch.X86)
|
|
for i, arg in enumerate(f.args):
|
|
details: List[str] = []
|
|
details.append(f"src={describe_arg_placement(source_layout.args[i])}")
|
|
details.append(f"dst={describe_arg_placement(target_layout.args[i])}")
|
|
details.append(f"class={arg.arg_class.value}")
|
|
details.append(f"sign_extended={arg.sign_extended}")
|
|
lines.append(f"\t# Arg {i} ({', '.join(details)})")
|
|
# details = []
|
|
# details.append(f"class={f.return_type.arg_class.value}")
|
|
# details.append(f"sign_extended={f.return_type.sign_extended}")
|
|
# lines.append(f"\t# Ret ({', '.join(details)})")
|
|
lines.append(f"ASM_WEAK({thunk}, @function)")
|
|
emit_cc_thunk(f, lines, arch)
|
|
lines.append(f"ASM_END({thunk})")
|
|
|
|
|
|
def emit_header_mapping(
|
|
dll: str,
|
|
funcs: Iterable[FuncInfo],
|
|
typedefs: Iterable[TypedefInfo],
|
|
variables: Iterable[VarInfo],
|
|
arch: Arch,
|
|
) -> str:
|
|
guard = f"WIBO_GEN_{dll.upper()}_THUNKS_H"
|
|
lines: List[str] = []
|
|
lines.append("/* Auto-generated; DO NOT EDIT. */")
|
|
lines.append(f"#ifndef {guard}")
|
|
lines.append(f"#define {guard}")
|
|
lines.append("#include <stddef.h>")
|
|
lines.append("#include <string.h>")
|
|
lines.append('#ifdef __cplusplus\nextern "C" {\n#endif')
|
|
|
|
# Guest-to-host thunk functions
|
|
for f in funcs:
|
|
# Generate best-effort function prototype so that simple thunks can be called directly
|
|
# in special cases (e.g. thunk_entry_stubBase)
|
|
def _is_opaque(t: Type) -> bool:
|
|
if (
|
|
t.kind == TypeKind.RECORD
|
|
or t.kind == TypeKind.ENUM
|
|
or t.kind == TypeKind.FUNCTIONPROTO
|
|
or t.kind == TypeKind.FUNCTIONNOPROTO
|
|
):
|
|
return True
|
|
return t.kind == TypeKind.POINTER and _is_opaque(
|
|
t.get_pointee().get_canonical()
|
|
)
|
|
|
|
def _canonical_type_str(t: Type) -> str:
|
|
c = t.get_canonical()
|
|
if _is_opaque(c):
|
|
return "void *"
|
|
return c.spelling
|
|
|
|
thunk = f"thunk_{dll}_{f.name}"
|
|
args = []
|
|
for i, arg in enumerate(f.args):
|
|
type_str = _canonical_type_str(arg.type)
|
|
args.append(f"{type_str} arg{i}")
|
|
param_list = ", ".join(args)
|
|
return_type = _canonical_type_str(f.return_type.type)
|
|
if arch == Arch.X86_64:
|
|
cc_attr = ""
|
|
elif f.source_cc == CallingConv.X86_STDCALL:
|
|
cc_attr = "__attribute__((stdcall)) "
|
|
elif f.source_cc == CallingConv.C:
|
|
cc_attr = "__attribute__((cdecl)) "
|
|
else:
|
|
raise NotImplementedError(
|
|
f"Unsupported calling convention {f.source_cc.name} for function {f.name}"
|
|
)
|
|
lines.append(f"{cc_attr}{return_type} {thunk}({param_list});")
|
|
|
|
# Host-to-guest thunk functions
|
|
for td in typedefs:
|
|
thunk = f"call_{td.name}"
|
|
if td.variadic:
|
|
continue
|
|
|
|
params = [f"{td.name} fn"]
|
|
for i, arg in enumerate(td.args):
|
|
type_str = _type_to_string(arg.type)
|
|
params.append(f"{type_str} arg{i}")
|
|
|
|
param_list = ", ".join(params)
|
|
return_type = _type_to_string(td.return_type.type)
|
|
lines.append(f"{return_type} {thunk}({param_list});")
|
|
|
|
lines.append("#ifdef __cplusplus\n}\n#endif")
|
|
lines.append("")
|
|
# name->address helper for resolveByName
|
|
lines.append("static inline void *%sThunkByName(const char *name) {" % dll)
|
|
for f in funcs:
|
|
lines.append(
|
|
f'\tif (strcmp(name, "{f.name}") == 0) return (void*)&thunk_{dll}_{f.name};'
|
|
)
|
|
for v in variables:
|
|
qualified = f"{v.qualified_ns}::{v.name}" if v.qualified_ns else v.name
|
|
lines.append(
|
|
f'\tif (strcmp(name, "{v.name}") == 0) return (void*)&{qualified};'
|
|
)
|
|
lines.append("\treturn NULL;")
|
|
lines.append("}")
|
|
|
|
lines.append(f"#endif /* {guard} */\n")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--dll", required=True, help="DLL name, e.g. kernel32")
|
|
ap.add_argument("--headers", nargs="+", required=True, help="Header files to scan")
|
|
ap.add_argument(
|
|
"--namespace", dest="ns", default=None, help="Namespace filter, e.g. kernel32"
|
|
)
|
|
ap.add_argument("--arch", choices=["x86", "x86_64"], default="x86")
|
|
ap.add_argument(
|
|
"--out-asm", type=Path, required=True, help="Output assembly file (.S)"
|
|
)
|
|
ap.add_argument(
|
|
"--out-hdr", type=Path, required=True, help="Output header file (.h)"
|
|
)
|
|
ap.add_argument("-I", dest="incs", action="append", default=[])
|
|
args = ap.parse_args()
|
|
|
|
if args.arch == "x86":
|
|
arch = Arch.X86
|
|
target = "i686-linux-gnu"
|
|
elif args.arch == "x86_64":
|
|
arch = Arch.X86_64
|
|
if sys.platform == "darwin":
|
|
target = "x86_64-apple-darwin"
|
|
else:
|
|
target = "x86_64-linux-gnu"
|
|
else:
|
|
raise ValueError(f"Unsupported architecture: {args.arch}")
|
|
|
|
tu = parse_tu(args.headers, args.incs, target)
|
|
funcs = collect_functions(tu, args.ns, arch)
|
|
typedefs = collect_typedefs(tu, arch)
|
|
variables = collect_variables(tu, args.ns)
|
|
|
|
if not funcs and not typedefs and not variables:
|
|
sys.stderr.write("No functions, typedefs, or variables found for generation.\n")
|
|
return 1
|
|
|
|
lines: List[str] = []
|
|
lines.append("# Auto-generated thunks; DO NOT EDIT.")
|
|
lines.append('#include "macros.S"')
|
|
lines.append(".text")
|
|
|
|
emit_guest_to_host_thunks(lines, args.dll, funcs, arch)
|
|
emit_host_to_guest_thunks(lines, typedefs, arch)
|
|
|
|
asm = "\n".join(lines) + "\n"
|
|
hdr = emit_header_mapping(args.dll, funcs, typedefs, variables, arch)
|
|
|
|
args.out_asm.parent.mkdir(parents=True, exist_ok=True)
|
|
args.out_hdr.parent.mkdir(parents=True, exist_ok=True)
|
|
args.out_asm.write_text(asm)
|
|
args.out_hdr.write_text(hdr)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|