Standardize the values for invalid and data opcodes (#261)

* Standardize the value for an invalid opcode
>
> This makes it so that all arches share the same value for an invalid opcode, so platform-specific logic isn't needed for checking whether instructions are valid.
Also updated dependencies

* OPCODE_DATA too
This commit is contained in:
Ethan Roseman 2025-09-14 03:33:06 +09:00 committed by GitHub
parent e6035b00df
commit e2c70342c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 440 additions and 339 deletions

698
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, ObjectSymbol as _, el
use unarm::{args, arm, thumb};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, OPCODE_INVALID, RelocationOverride, RelocationOverrideTarget},
diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@ -164,7 +164,7 @@ impl ArchArm {
}
_ => bail!("Invalid instruction size {}", ins_ref.size),
};
let (ins, parsed_ins) = if ins_ref.opcode == u16::MAX {
let (ins, parsed_ins) = if ins_ref.opcode == OPCODE_DATA {
let mut args = args::Arguments::default();
args[0] = args::Argument::UImm(code);
let mnemonic = if ins_ref.size == 4 { ".word" } else { ".hword" };
@ -238,7 +238,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: data.len() as u8,
opcode: u16::MAX,
opcode: OPCODE_DATA,
branch_dest: None,
});
break;
@ -257,7 +257,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: ins_size as u8,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
address += ins_size as u32;
@ -318,7 +318,7 @@ impl Arch for ArchArm {
};
(opcode, branch_dest)
}
unarm::ParseMode::Data => (u16::MAX, None),
unarm::ParseMode::Data => (OPCODE_DATA, None),
};
ops.push(InstructionRef {

View File

@ -14,7 +14,7 @@ use yaxpeax_arm::armv8::a64::{
};
use crate::{
arch::Arch,
arch::{Arch, OPCODE_INVALID},
diff::{DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@ -60,7 +60,7 @@ impl Arch for ArchArm64 {
ops.push(InstructionRef {
address,
size: 4,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
continue;
@ -87,7 +87,7 @@ impl Arch for ArchArm64 {
let decoder = InstDecoder::default();
let mut ins = Instruction::default();
if decoder.decode_into(&mut ins, &mut reader).is_err() {
cb(InstructionPart::opcode("<invalid>", u16::MAX))?;
cb(InstructionPart::opcode("<invalid>", OPCODE_INVALID))?;
return Ok(());
}
@ -2295,7 +2295,7 @@ where Cb: FnMut(InstructionPart<'static>) {
// Opcode is #[repr(u16)], but the tuple variants negate that, so we have to do this instead.
const fn opcode_to_u16(opcode: Opcode) -> u16 {
match opcode {
Opcode::Invalid => u16::MAX,
Opcode::Invalid => OPCODE_INVALID,
Opcode::UDF => 0,
Opcode::MOVN => 1,
Opcode::MOVK => 2,

View File

@ -41,6 +41,9 @@ pub mod superh;
#[cfg(feature = "x86")]
pub mod x86;
pub const OPCODE_INVALID: u16 = u16::MAX;
pub const OPCODE_DATA: u16 = u16::MAX - 1;
/// Represents the type of data associated with an instruction
#[derive(PartialEq)]
pub enum DataType {

View File

@ -9,7 +9,7 @@ use iced_x86::{
use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, RelocationOverride, RelocationOverrideTarget},
diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
};
@ -89,8 +89,6 @@ impl ArchX86 {
}
}
const DATA_OPCODE: u16 = u16::MAX - 1;
impl Arch for ArchX86 {
fn scan_instructions_internal(
&self,
@ -121,7 +119,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address,
size: size as u8,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
@ -148,7 +146,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address: indirect_array_address + i as u64,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
}
@ -187,14 +185,14 @@ impl Arch for ArchX86 {
diff_config: &DiffObjConfig,
cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
) -> Result<()> {
if resolved.ins_ref.opcode == DATA_OPCODE {
if resolved.ins_ref.opcode == OPCODE_DATA {
let (mnemonic, imm) = match resolved.ins_ref.size {
1 => (".byte", resolved.code[0] as u64),
2 => (".word", self.endianness.read_u16_bytes(resolved.code.try_into()?) as u64),
4 => (".dword", self.endianness.read_u32_bytes(resolved.code.try_into()?) as u64),
_ => bail!("Unsupported x86 inline data size {}", resolved.ins_ref.size),
};
cb(InstructionPart::opcode(mnemonic, DATA_OPCODE))?;
cb(InstructionPart::opcode(mnemonic, OPCODE_DATA))?;
if resolved.relocation.is_some() {
cb(InstructionPart::reloc())?;
} else {
@ -836,7 +834,7 @@ mod test {
ins_ref: InstructionRef {
address: 0x1234,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
},
code: &code,
@ -850,7 +848,7 @@ mod test {
)
.unwrap();
assert_eq!(parts, &[
InstructionPart::opcode(".byte", DATA_OPCODE),
InstructionPart::opcode(".byte", OPCODE_DATA),
InstructionPart::unsigned(0xABu64),
]);
}

View File

@ -4,4 +4,4 @@ expression: output
---
[(Line(90), Dim, 5), (Address(0), Dim, 5), (Spacing(4), Normal, 0), (Opcode("ldr", 32799), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(0)), Normal, 0), (Basic("]"), Normal, 0), (Basic(" (->"), Normal, 0), (BranchDest(8), Normal, 0), (Basic(")"), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(4), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bx", 32779), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@ -36,7 +36,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 84,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@ -1829,7 +1829,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 460,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1843,7 +1843,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 464,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1864,7 +1864,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 468,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@ -107,6 +107,6 @@ expression: output
[(Address(408), Dim, 5), (Basic(" ~> "), Rotating(16), 0), (Opcode("mov", 32818), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Unsigned(0)), Normal, 0), (Eol, Normal, 0)]
[(Address(412), Dim, 5), (Spacing(4), Normal, 0), (Opcode("strb", 32899), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(38)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(416), Dim, 5), (Spacing(4), Normal, 0), (Opcode("ldmia", 32793), Normal, 10), (Argument(Opaque("sp")), Normal, 0), (Argument(Opaque("!")), Normal, 0), (Basic(", "), Normal, 0), (Basic("{"), Normal, 0), (Argument(Opaque("r4")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r6")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic("}"), Normal, 0), (Eol, Normal, 0)]
[(Address(420), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(424), Dim, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(428), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(420), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(424), Dim, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
[(Address(428), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@ -1512,7 +1512,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 216,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1526,7 +1526,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 220,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1540,7 +1540,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 224,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1554,7 +1554,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 228,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1568,7 +1568,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 232,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1582,7 +1582,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 236,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),
@ -1596,7 +1596,7 @@ expression: diff.instruction_rows
InstructionRef {
address: 240,
size: 4,
opcode: 65535,
opcode: 65534,
branch_dest: None,
},
),

View File

@ -101,10 +101,10 @@ expression: output
[(Line(81), Dim, 5), (Address(208), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bl", 19), Normal, 10), (Symbol(Symbol { name: "HEManager_PopState", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Addend(-4), Bright, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(212), Dim, 5), (Basic(" ~> "), Rotating(0), 0), (Opcode("add", 7), Normal, 10), (Argument(Opaque("sp")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(16)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(214), Dim, 5), (Spacing(4), Normal, 0), (Opcode("pop", 55), Normal, 10), (Basic("{"), Normal, 0), (Argument(Opaque("r3")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r4")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r6")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r7")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic("}"), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(216), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(285)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(220), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1192)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(224), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(7544)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(228), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9103)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(232), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1930)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(236), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(4294901760)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(240), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9129)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(216), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(285)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(220), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1192)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(224), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(7544)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(228), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9103)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(232), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(1930)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(236), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(4294901760)), Normal, 0), (Eol, Normal, 0)]
[(Line(86), Dim, 5), (Address(240), Dim, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65534), Normal, 10), (Basic("#"), Normal, 0), (Argument(Unsigned(9129)), Normal, 0), (Eol, Normal, 0)]