Standardize the values for invalid and data opcodes (#261)

* Standardize the value for an invalid opcode
>
> This makes it so that all arches share the same value for an invalid opcode, so platform-specific logic isn't needed for checking whether instructions are valid.
Also updated dependencies

* OPCODE_DATA too
This commit is contained in:
Ethan Roseman
2025-09-14 03:33:06 +09:00
committed by GitHub
parent e6035b00df
commit e2c70342c9
11 changed files with 440 additions and 339 deletions

698
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, ObjectSymbol as _, el
use unarm::{args, arm, thumb};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, OPCODE_INVALID, RelocationOverride, RelocationOverrideTarget},
diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@@ -164,7 +164,7 @@ impl ArchArm {
}
_ => bail!("Invalid instruction size {}", ins_ref.size),
};
let (ins, parsed_ins) = if ins_ref.opcode == u16::MAX {
let (ins, parsed_ins) = if ins_ref.opcode == OPCODE_DATA {
let mut args = args::Arguments::default();
args[0] = args::Argument::UImm(code);
let mnemonic = if ins_ref.size == 4 { ".word" } else { ".hword" };
@@ -238,7 +238,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: data.len() as u8,
opcode: u16::MAX,
opcode: OPCODE_DATA,
branch_dest: None,
});
break;
@@ -257,7 +257,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: ins_size as u8,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
address += ins_size as u32;
@@ -318,7 +318,7 @@ impl Arch for ArchArm {
};
(opcode, branch_dest)
}
unarm::ParseMode::Data => (u16::MAX, None),
unarm::ParseMode::Data => (OPCODE_DATA, None),
};
ops.push(InstructionRef {

View File

@@ -14,7 +14,7 @@ use yaxpeax_arm::armv8::a64::{
};
use crate::{
arch::Arch,
arch::{Arch, OPCODE_INVALID},
diff::{DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@@ -60,7 +60,7 @@ impl Arch for ArchArm64 {
ops.push(InstructionRef {
address,
size: 4,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
continue;
@@ -87,7 +87,7 @@ impl Arch for ArchArm64 {
let decoder = InstDecoder::default();
let mut ins = Instruction::default();
if decoder.decode_into(&mut ins, &mut reader).is_err() {
cb(InstructionPart::opcode("<invalid>", u16::MAX))?;
cb(InstructionPart::opcode("<invalid>", OPCODE_INVALID))?;
return Ok(());
}
@@ -2295,7 +2295,7 @@ where Cb: FnMut(InstructionPart<'static>) {
// Opcode is #[repr(u16)], but the tuple variants negate that, so we have to do this instead.
const fn opcode_to_u16(opcode: Opcode) -> u16 {
match opcode {
Opcode::Invalid => u16::MAX,
Opcode::Invalid => OPCODE_INVALID,
Opcode::UDF => 0,
Opcode::MOVN => 1,
Opcode::MOVK => 2,

View File

@@ -41,6 +41,9 @@ pub mod superh;
#[cfg(feature = "x86")]
pub mod x86;
pub const OPCODE_INVALID: u16 = u16::MAX;
pub const OPCODE_DATA: u16 = u16::MAX - 1;
/// Represents the type of data associated with an instruction
#[derive(PartialEq)]
pub enum DataType {

View File

@@ -9,7 +9,7 @@ use iced_x86::{
use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, RelocationOverride, RelocationOverrideTarget},
diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
};
@@ -89,8 +89,6 @@ impl ArchX86 {
}
}
const DATA_OPCODE: u16 = u16::MAX - 1;
impl Arch for ArchX86 {
fn scan_instructions_internal(
&self,
@@ -121,7 +119,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address,
size: size as u8,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
@@ -148,7 +146,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address: indirect_array_address + i as u64,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
}
@@ -187,14 +185,14 @@ impl Arch for ArchX86 {
diff_config: &DiffObjConfig,
cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
) -> Result<()> {
if resolved.ins_ref.opcode == DATA_OPCODE {
if resolved.ins_ref.opcode == OPCODE_DATA {
let (mnemonic, imm) = match resolved.ins_ref.size {
1 => (".byte", resolved.code[0] as u64),
2 => (".word", self.endianness.read_u16_bytes(resolved.code.try_into()?) as u64),
4 => (".dword", self.endianness.read_u32_bytes(resolved.code.try_into()?) as u64),
_ => bail!("Unsupported x86 inline data size {}", resolved.ins_ref.size),
};
cb(InstructionPart::opcode(mnemonic, DATA_OPCODE))?;
cb(InstructionPart::opcode(mnemonic, OPCODE_DATA))?;
if resolved.relocation.is_some() {
cb(InstructionPart::reloc())?;
} else {
@@ -836,7 +834,7 @@ mod test {
ins_ref: InstructionRef {
address: 0x1234,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
},
code: &code,
@@ -850,7 +848,7 @@ mod test {
)
.unwrap();
assert_eq!(parts, &[
InstructionPart::opcode(".byte", DATA_OPCODE),
InstructionPart::opcode(".byte", OPCODE_DATA),
InstructionPart::unsigned(0xABu64),
]);
}

View File

@@ -4,4 +4,4 @@ expression: output
---
[(Line(90), Dim, 5), (Address(0), Dim, 5), (Spacing(4), Normal, 0), (Opcode("ldr", 32799), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(0)), Normal, 0), (Basic("]"), Normal, 0), (Basic(" (->"), Normal, 0), (BranchDest(8), Normal, 0), (Basic(")"), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(4), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bx", 32779), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@@ -36,7 +36,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 84,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@@ -1829,7 +1829,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 460,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1843,7 +1843,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 464,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1864,7 +1864,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 468,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@@ -107,6 +107,6 @@ expression: output
[(Address(408), Dim, 5), (Basic(" ~> "), Rotating(16), 0), (Opcode("mov", 32818), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Unsigned(0)), Normal, 0), (Eol, Normal, 0)]
[(Address(412), Dim, 5), (Spacing(4), Normal, 0), (Opcode("strb", 32899), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(38)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(416), Dim, 5), (Spacing(4), Normal, 0), (Opcode("ldmia", 32793), Normal, 10), (Argument(Opaque("sp")), Normal, 0), (Argument(Opaque("!")), Normal, 0), (Basic(", "), Normal, 0), (Basic("{"), Normal, 0), (Argument(Opaque("r4")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r6")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic("}"), Normal, 0), (Eol, Normal, 0)]
[(Address(420), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(424), Dim, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(428), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(420), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(424), Dim, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(428), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@@ -1512,7 +1512,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 216,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1526,7 +1526,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 220,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1540,7 +1540,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 224,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1554,7 +1554,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 228,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1568,7 +1568,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 232,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1582,7 +1582,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 236,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@@ -1596,7 +1596,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 240,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@@ -101,10 +101,10 @@ expression: output
[(Line(81), Dim, 5), (Address(208), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bl", 19), Normal, 10), (Symbol(Symbol { name: "HEManager_PopState", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Addend(-4), Bright, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(212), Dim, 5), (Basic(" ~> "), Rotating(0), 0), (Opcode("add", 7), Normal, 10), (Argument(Opaque("sp")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(16)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(214), Dim, 5), (Spacing(4), Normal, 0), (Opcode("pop", 55), Normal, 10), (Basic("{"), Normal, 0), (Argument(Opaque("r3")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r4")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r6")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r7")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic("}"), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(216), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(285)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(220), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1192)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(224), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(7544)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(228), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9103)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(232), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1930)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(236), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(4294901760)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(240), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9129)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(216), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(285)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(220), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1192)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(224), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(7544)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(228), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9103)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(232), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1930)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(236), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(4294901760)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(240), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9129)), Normal, 0), (Eol, Normal, 0)]