Standardize the values for invalid and data opcodes (#261)

* Standardize the value for an invalid opcode
>
> This makes it so that all arches share the same value for an invalid opcode, so platform-specific logic isn't needed for checking whether instructions are valid.
Also updated dependencies

* OPCODE_DATA too
This commit is contained in:
Ethan Roseman
2025-09-14 03:33:06 +09:00
committed by GitHub
parent e6035b00df
commit e2c70342c9
11 changed files with 440 additions and 339 deletions

View File

@@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, ObjectSymbol as _, el
use unarm::{args, arm, thumb};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, OPCODE_INVALID, RelocationOverride, RelocationOverrideTarget},
diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@@ -164,7 +164,7 @@ impl ArchArm {
}
_ => bail!("Invalid instruction size {}", ins_ref.size),
};
let (ins, parsed_ins) = if ins_ref.opcode == u16::MAX {
let (ins, parsed_ins) = if ins_ref.opcode == OPCODE_DATA {
let mut args = args::Arguments::default();
args[0] = args::Argument::UImm(code);
let mnemonic = if ins_ref.size == 4 { ".word" } else { ".hword" };
@@ -238,7 +238,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: data.len() as u8,
opcode: u16::MAX,
opcode: OPCODE_DATA,
branch_dest: None,
});
break;
@@ -257,7 +257,7 @@ impl Arch for ArchArm {
ops.push(InstructionRef {
address: address as u64,
size: ins_size as u8,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
address += ins_size as u32;
@@ -318,7 +318,7 @@ impl Arch for ArchArm {
};
(opcode, branch_dest)
}
unarm::ParseMode::Data => (u16::MAX, None),
unarm::ParseMode::Data => (OPCODE_DATA, None),
};
ops.push(InstructionRef {

View File

@@ -14,7 +14,7 @@ use yaxpeax_arm::armv8::a64::{
};
use crate::{
arch::Arch,
arch::{Arch, OPCODE_INVALID},
diff::{DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
@@ -60,7 +60,7 @@ impl Arch for ArchArm64 {
ops.push(InstructionRef {
address,
size: 4,
opcode: u16::MAX,
opcode: OPCODE_INVALID,
branch_dest: None,
});
continue;
@@ -87,7 +87,7 @@ impl Arch for ArchArm64 {
let decoder = InstDecoder::default();
let mut ins = Instruction::default();
if decoder.decode_into(&mut ins, &mut reader).is_err() {
cb(InstructionPart::opcode("<invalid>", u16::MAX))?;
cb(InstructionPart::opcode("<invalid>", OPCODE_INVALID))?;
return Ok(());
}
@@ -2295,7 +2295,7 @@ where Cb: FnMut(InstructionPart<'static>) {
// Opcode is #[repr(u16)], but the tuple variants negate that, so we have to do this instead.
const fn opcode_to_u16(opcode: Opcode) -> u16 {
match opcode {
Opcode::Invalid => u16::MAX,
Opcode::Invalid => OPCODE_INVALID,
Opcode::UDF => 0,
Opcode::MOVN => 1,
Opcode::MOVK => 2,

View File

@@ -41,6 +41,9 @@ pub mod superh;
#[cfg(feature = "x86")]
pub mod x86;
pub const OPCODE_INVALID: u16 = u16::MAX;
pub const OPCODE_DATA: u16 = u16::MAX - 1;
/// Represents the type of data associated with an instruction
#[derive(PartialEq)]
pub enum DataType {

View File

@@ -9,7 +9,7 @@ use iced_x86::{
use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
arch::{Arch, OPCODE_DATA, RelocationOverride, RelocationOverrideTarget},
diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
};
@@ -89,8 +89,6 @@ impl ArchX86 {
}
}
const DATA_OPCODE: u16 = u16::MAX - 1;
impl Arch for ArchX86 {
fn scan_instructions_internal(
&self,
@@ -121,7 +119,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address,
size: size as u8,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
@@ -148,7 +146,7 @@ impl Arch for ArchX86 {
out.push(InstructionRef {
address: indirect_array_address + i as u64,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
});
}
@@ -187,14 +185,14 @@ impl Arch for ArchX86 {
diff_config: &DiffObjConfig,
cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
) -> Result<()> {
if resolved.ins_ref.opcode == DATA_OPCODE {
if resolved.ins_ref.opcode == OPCODE_DATA {
let (mnemonic, imm) = match resolved.ins_ref.size {
1 => (".byte", resolved.code[0] as u64),
2 => (".word", self.endianness.read_u16_bytes(resolved.code.try_into()?) as u64),
4 => (".dword", self.endianness.read_u32_bytes(resolved.code.try_into()?) as u64),
_ => bail!("Unsupported x86 inline data size {}", resolved.ins_ref.size),
};
cb(InstructionPart::opcode(mnemonic, DATA_OPCODE))?;
cb(InstructionPart::opcode(mnemonic, OPCODE_DATA))?;
if resolved.relocation.is_some() {
cb(InstructionPart::reloc())?;
} else {
@@ -836,7 +834,7 @@ mod test {
ins_ref: InstructionRef {
address: 0x1234,
size: 1,
opcode: DATA_OPCODE,
opcode: OPCODE_DATA,
branch_dest: None,
},
code: &code,
@@ -850,7 +848,7 @@ mod test {
)
.unwrap();
assert_eq!(parts, &[
InstructionPart::opcode(".byte", DATA_OPCODE),
InstructionPart::opcode(".byte", OPCODE_DATA),
InstructionPart::unsigned(0xABu64),
]);
}