WIP objdiff 3.0 refactor

This commit is contained in:
2025-02-20 17:48:00 -07:00
parent 6d3c63ccd8
commit f3c157ff06
79 changed files with 14886 additions and 6820 deletions

View File

@@ -7,33 +7,93 @@ use alloc::{
vec,
vec::Vec,
};
use std::ops::Range;
use anyhow::{anyhow, bail, ensure, Result};
use iced_x86::{
Decoder, DecoderOptions, DecoratorKind, Formatter, FormatterOutput, FormatterTextKind,
GasFormatter, Instruction, IntelFormatter, MasmFormatter, NasmFormatter, NumberKind, OpKind,
PrefixKind, Register,
Decoder, DecoderOptions, DecoratorKind, FormatterOutput, FormatterTextKind, GasFormatter,
Instruction, IntelFormatter, MasmFormatter, NasmFormatter, NumberKind, OpKind, PrefixKind,
Register,
};
use object::{pe, Endian, Endianness, File, Object, Relocation, RelocationFlags};
use object::{pe, Endian as _, Object as _, ObjectSection as _};
use crate::{
arch::{ObjArch, ProcessCodeResult},
diff::{DiffObjConfig, X86Formatter},
obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection},
arch::Arch,
diff::{display::InstructionPart, DiffObjConfig, X86Formatter},
obj::{
InstructionArg, InstructionArgValue, InstructionRef, ParsedInstruction, RelocationFlags,
ResolvedRelocation, ScannedInstruction,
},
};
pub struct ObjArchX86 {
#[derive(Debug)]
pub struct ArchX86 {
bits: u32,
endianness: Endianness,
endianness: object::Endianness,
}
impl ObjArchX86 {
pub fn new(object: &File) -> Result<Self> {
impl ArchX86 {
pub fn new(object: &object::File) -> Result<Self> {
Ok(Self { bits: if object.is_64() { 64 } else { 32 }, endianness: object.endianness() })
}
fn formatter(&self, diff_config: &DiffObjConfig) -> Box<dyn iced_x86::Formatter> {
let mut formatter: Box<dyn iced_x86::Formatter> = match diff_config.x86_formatter {
X86Formatter::Intel => Box::new(IntelFormatter::new()),
X86Formatter::Gas => Box::new(GasFormatter::new()),
X86Formatter::Nasm => Box::new(NasmFormatter::new()),
X86Formatter::Masm => Box::new(MasmFormatter::new()),
};
formatter.options_mut().set_space_after_operand_separator(diff_config.space_between_args);
formatter
}
}
impl ObjArch for ObjArchX86 {
impl Arch for ArchX86 {
fn scan_instructions(
&self,
address: u64,
code: &[u8],
_section_index: usize,
_diff_config: &DiffObjConfig,
) -> Result<Vec<ScannedInstruction>> {
let mut out = Vec::with_capacity(code.len() / 2);
let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE);
let mut instruction = Instruction::default();
while decoder.can_decode() {
decoder.decode_out(&mut instruction);
// TODO is this right?
let branch_dest = match instruction.op0_kind() {
OpKind::NearBranch16 => Some(instruction.near_branch16() as u64),
OpKind::NearBranch32 => Some(instruction.near_branch32() as u64),
OpKind::NearBranch64 => Some(instruction.near_branch64()),
_ => None,
};
out.push(ScannedInstruction {
ins_ref: InstructionRef {
address: instruction.ip(),
size: instruction.len() as u8,
opcode: instruction.mnemonic() as u16,
},
branch_dest,
});
}
Ok(out)
}
fn display_instruction(
&self,
ins_ref: InstructionRef,
code: &[u8],
relocation: Option<ResolvedRelocation>,
function_range: Range<u64>,
section_index: usize,
diff_config: &DiffObjConfig,
cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
) -> Result<()> {
todo!()
}
fn process_code(
&self,
address: u64,
@@ -45,13 +105,7 @@ impl ObjArch for ObjArchX86 {
) -> Result<ProcessCodeResult> {
let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() };
let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE);
let mut formatter: Box<dyn Formatter> = match config.x86_formatter {
X86Formatter::Intel => Box::new(IntelFormatter::new()),
X86Formatter::Gas => Box::new(GasFormatter::new()),
X86Formatter::Nasm => Box::new(NasmFormatter::new()),
X86Formatter::Masm => Box::new(MasmFormatter::new()),
};
formatter.options_mut().set_space_after_operand_separator(config.space_between_args);
let mut formatter = self.formatter(config);
let mut output = InstructionFormatterOutput {
formatted: String::new(),
@@ -101,10 +155,12 @@ impl ObjArch for ObjArchX86 {
output.ins.formatted.clone_from(&output.formatted);
// Make sure we've put the relocation somewhere in the instruction
if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) {
if reloc.is_some()
&& !output.ins.args.iter().any(|a| matches!(a, InstructionArg::Reloc))
{
let mut found = replace_arg(
OpKind::Memory,
ObjInsArg::Reloc,
InstructionArg::Reloc,
&mut output.ins.args,
&instruction,
&output.ins_operands,
@@ -112,7 +168,7 @@ impl ObjArch for ObjArchX86 {
if !found {
found = replace_arg(
OpKind::Immediate32,
ObjInsArg::Reloc,
InstructionArg::Reloc,
&mut output.ins.args,
&instruction,
&output.ins_operands,
@@ -120,7 +176,9 @@ impl ObjArch for ObjArchX86 {
}
ensure!(found, "x86: Failed to find operand for Absolute relocation");
}
if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) {
if reloc.is_some()
&& !output.ins.args.iter().any(|a| matches!(a, InstructionArg::Reloc))
{
bail!("Failed to find relocation in instruction");
}
@@ -136,14 +194,15 @@ impl ObjArch for ObjArchX86 {
fn implcit_addend(
&self,
_file: &File<'_>,
section: &ObjSection,
_file: &object::File<'_>,
section: &object::Section,
address: u64,
reloc: &Relocation,
_relocation: &object::Relocation,
flags: RelocationFlags,
) -> Result<i64> {
match reloc.flags() {
RelocationFlags::Coff { typ: pe::IMAGE_REL_I386_DIR32 | pe::IMAGE_REL_I386_REL32 } => {
let data = section.data[address as usize..address as usize + 4].try_into()?;
match flags {
RelocationFlags::Coff(pe::IMAGE_REL_I386_DIR32 | pe::IMAGE_REL_I386_REL32) => {
let data = section.data()[address as usize..address as usize + 4].try_into()?;
Ok(self.endianness.read_i32_bytes(data) as i64)
}
flags => bail!("Unsupported x86 implicit relocation {flags:?}"),
@@ -162,7 +221,7 @@ impl ObjArch for ObjArchX86 {
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
match flags {
RelocationFlags::Coff { typ } => match typ {
RelocationFlags::Coff(typ) => match typ {
pe::IMAGE_REL_I386_DIR32 => Cow::Borrowed("IMAGE_REL_I386_DIR32"),
pe::IMAGE_REL_I386_REL32 => Cow::Borrowed("IMAGE_REL_I386_REL32"),
_ => Cow::Owned(format!("<{flags:?}>")),
@@ -173,7 +232,7 @@ impl ObjArch for ObjArchX86 {
fn get_reloc_byte_size(&self, flags: RelocationFlags) -> usize {
match flags {
RelocationFlags::Coff { typ } => match typ {
RelocationFlags::Coff(typ) => match typ {
pe::IMAGE_REL_I386_DIR16 => 2,
pe::IMAGE_REL_I386_REL16 => 2,
pe::IMAGE_REL_I386_DIR32 => 4,
@@ -187,8 +246,8 @@ impl ObjArch for ObjArchX86 {
fn replace_arg(
from: OpKind,
to: ObjInsArg,
args: &mut [ObjInsArg],
to: InstructionArg,
args: &mut [InstructionArg],
instruction: &Instruction,
ins_operands: &[Option<u32>],
) -> Result<bool> {
@@ -213,7 +272,7 @@ fn replace_arg(
struct InstructionFormatterOutput {
formatted: String,
ins: ObjIns,
ins: ParsedInstruction,
error: Option<anyhow::Error>,
ins_operands: Vec<Option<u32>>,
}
@@ -223,11 +282,13 @@ impl InstructionFormatterOutput {
// The formatter writes the '-' operator and then gives us a negative value,
// so convert it to a positive value to avoid double negatives
if value < 0
&& matches!(self.ins.args.last(), Some(ObjInsArg::Arg(ObjInsArgValue::Opaque(v))) if v == "-")
&& matches!(self.ins.args.last(), Some(InstructionArg::Value(InstructionArgValue::Opaque(v))) if v == "-")
{
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(value.wrapping_abs())));
self.ins
.args
.push(InstructionArg::Value(InstructionArgValue::Signed(value.wrapping_abs())));
} else {
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(value)));
self.ins.args.push(InstructionArg::Value(InstructionArgValue::Signed(value)));
}
}
}
@@ -242,10 +303,12 @@ impl FormatterOutput for InstructionFormatterOutput {
self.ins_operands.push(None);
match kind {
FormatterTextKind::Text | FormatterTextKind::Punctuation => {
self.ins.args.push(ObjInsArg::PlainText(text.to_string().into()));
self.ins.args.push(InstructionArg::PlainText(text.to_string().into()));
}
FormatterTextKind::Keyword | FormatterTextKind::Operator => {
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(text.to_string().into())));
self.ins.args.push(InstructionArg::Value(InstructionArgValue::Opaque(
text.to_string().into(),
)));
}
_ => {
if self.error.is_none() {
@@ -258,12 +321,13 @@ impl FormatterOutput for InstructionFormatterOutput {
fn write_prefix(&mut self, _instruction: &Instruction, text: &str, _prefix: PrefixKind) {
self.formatted.push_str(text);
self.ins_operands.push(None);
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(text.to_string().into())));
self.ins
.args
.push(InstructionArg::Value(InstructionArgValue::Opaque(text.to_string().into())));
}
fn write_mnemonic(&mut self, _instruction: &Instruction, text: &str) {
self.formatted.push_str(text);
// TODO: can iced-x86 guarantee 'static here?
self.ins.mnemonic = Cow::Owned(text.to_string());
}
@@ -284,10 +348,11 @@ impl FormatterOutput for InstructionFormatterOutput {
match kind {
FormatterTextKind::LabelAddress => {
if let Some(reloc) = self.ins.reloc.as_ref() {
if matches!(reloc.flags, RelocationFlags::Coff {
typ: pe::IMAGE_REL_I386_DIR32 | pe::IMAGE_REL_I386_REL32
}) {
self.ins.args.push(ObjInsArg::Reloc);
if matches!(
reloc.flags,
RelocationFlags::Coff(pe::IMAGE_REL_I386_DIR32 | pe::IMAGE_REL_I386_REL32)
) {
self.ins.args.push(InstructionArg::Reloc);
return;
} else if self.error.is_none() {
self.error = Some(anyhow!(
@@ -296,16 +361,14 @@ impl FormatterOutput for InstructionFormatterOutput {
));
}
}
self.ins.args.push(ObjInsArg::BranchDest(value));
self.ins.args.push(InstructionArg::BranchDest(value));
self.ins.branch_dest = Some(value);
return;
}
FormatterTextKind::FunctionAddress => {
if let Some(reloc) = self.ins.reloc.as_ref() {
if matches!(reloc.flags, RelocationFlags::Coff {
typ: pe::IMAGE_REL_I386_REL32
}) {
self.ins.args.push(ObjInsArg::Reloc);
if matches!(reloc.flags, RelocationFlags::Coff(pe::IMAGE_REL_I386_REL32)) {
self.ins.args.push(InstructionArg::Reloc);
return;
} else if self.error.is_none() {
self.error = Some(anyhow!(
@@ -332,7 +395,7 @@ impl FormatterOutput for InstructionFormatterOutput {
self.push_signed(value as i64);
}
NumberKind::UInt8 | NumberKind::UInt16 | NumberKind::UInt32 | NumberKind::UInt64 => {
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(value)));
self.ins.args.push(InstructionArg::Value(InstructionArgValue::Unsigned(value)));
}
}
}
@@ -347,7 +410,7 @@ impl FormatterOutput for InstructionFormatterOutput {
) {
self.formatted.push_str(text);
self.ins_operands.push(instruction_operand);
self.ins.args.push(ObjInsArg::PlainText(text.to_string().into()));
self.ins.args.push(InstructionArg::PlainText(text.to_string().into()));
}
fn write_register(
@@ -360,6 +423,8 @@ impl FormatterOutput for InstructionFormatterOutput {
) {
self.formatted.push_str(text);
self.ins_operands.push(instruction_operand);
self.ins.args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(text.to_string().into())));
self.ins
.args
.push(InstructionArg::Value(InstructionArgValue::Opaque(text.to_string().into())));
}
}